Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CORE][CACHE][HASH] DYNAMIC_CODE_POLICY check on Windows #29006

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/common/util/include/openvino/util/common_util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,5 +180,13 @@ constexpr std::array<std::conditional_t<std::is_void_v<T>, std::common_type_t<Ar
return {std::forward<Args>(args)...};
}

#if defined(_WIN32)
bool may_i_use_dynamic_code();
nshchego marked this conversation as resolved.
Show resolved Hide resolved
#else
constexpr bool may_i_use_dynamic_code() {
return true;
}
#endif

} // namespace util
} // namespace ov
13 changes: 13 additions & 0 deletions src/common/util/src/common_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@

#include <algorithm>

#if defined(_WIN32)
# include <windows.h>
#endif

std::string ov::util::to_lower(const std::string& s) {
std::string rc = s;
std::transform(rc.begin(), rc.end(), rc.begin(), ::tolower);
Expand Down Expand Up @@ -60,3 +64,12 @@ std::string ov::util::filter_lines_by_prefix(const std::string& str, const std::
}
return res.str();
}

#if defined(_WIN32)
bool ov::util::may_i_use_dynamic_code() {
HANDLE handle = GetCurrentProcess();
PROCESS_MITIGATION_DYNAMIC_CODE_POLICY dynamic_code_policy = {0};
GetProcessMitigationPolicy(handle, ProcessDynamicCodePolicy, &dynamic_code_policy, sizeof(dynamic_code_policy));
return dynamic_code_policy.ProhibitDynamicCode != TRUE;
}
#endif
28 changes: 16 additions & 12 deletions src/core/reference/src/op/convert.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#ifdef OV_CORE_USE_XBYAK_JIT
# include "openvino/reference/utils/jit_generator.hpp"
# include "openvino/util/common_util.hpp"
#endif

#ifdef OV_CORE_USE_INTRINSICS
Expand Down Expand Up @@ -480,14 +481,15 @@ class jit_count_out_of_range : public jit::Generator {
template <class Clamp, typename TI, typename TO>
void convert_impl(const TI* arg, TO* out, size_t count) {
#ifdef OV_CORE_USE_XBYAK_JIT
if (auto converter = jit_convert_array::get<TI, TO, Clamp::enabled>()) {
jit_convert_array::args_t args = {arg, out, count};
converter(&args);
} else
#endif
{
Converter<TI, TO>::template apply<Clamp>(arg, out, count);
if (util::may_i_use_dynamic_code()) {
if (auto converter = jit_convert_array::get<TI, TO, Clamp::enabled>()) {
jit_convert_array::args_t args = {arg, out, count};
converter(&args);
return;
}
}
#endif // OV_CORE_USE_XBYAK_JIT
Converter<TI, TO>::template apply<Clamp>(arg, out, count);
}
} // namespace

Expand Down Expand Up @@ -544,11 +546,13 @@ void convert_from_bf16_to_f16_with_clamp(const bfloat16* arg, float16* out, size

size_t count_out_of_f16_range(const float* arg, size_t count) {
#ifdef OV_CORE_USE_XBYAK_JIT
if (auto converter = jit_count_out_of_range::get<float, float16>()) {
size_t num_out_of_range = 0;
jit_count_out_of_range::args_t args = {arg, &num_out_of_range, count};
converter(&args);
return num_out_of_range;
if (util::may_i_use_dynamic_code()) {
if (auto converter = jit_count_out_of_range::get<float, float16>()) {
size_t num_out_of_range = 0;
jit_count_out_of_range::args_t args = {arg, &num_out_of_range, count};
converter(&args);
return num_out_of_range;
}
}
#endif // OV_CORE_USE_XBYAK_JIT
const auto is_out_of_f16_range = [](const float v) {
Expand Down
133 changes: 68 additions & 65 deletions src/core/src/runtime/compute_hash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#ifdef OV_CORE_USE_XBYAK_JIT
# include "openvino/core/parallel.hpp"
# include "openvino/reference/utils/registers_pool.hpp"
# include "openvino/util/common_util.hpp"
#endif // OV_CORE_USE_XBYAK_JIT

namespace ov {
Expand Down Expand Up @@ -822,77 +823,79 @@ void ComputeHash<isa>::fold_to_64(const Vmm& v_dst) {

size_t compute_hash(const void* src, size_t size) {
#ifdef OV_CORE_USE_XBYAK_JIT
if (Generator::mayiuse(avx2)) {
uint64_t result = 0lu;

// Parallel section
constexpr uint64_t min_wa_per_thread = 131072lu; // 2^17
const uint64_t size_u64 = static_cast<uint64_t>(size);
if (size_u64 >= min_wa_per_thread * 2lu) {
static auto first_thr_kernel = Generator::mayiuse(avx512_core)
? jit::ComputeHash<avx512_core>::create({jit::FIRST_THREAD})
: jit::ComputeHash<avx2>::create({jit::FIRST_THREAD});
static auto n_thr_kernel = Generator::mayiuse(avx512_core)
? jit::ComputeHash<avx512_core>::create({jit::N_THREAD})
: jit::ComputeHash<avx2>::create({jit::N_THREAD});
static auto final_fold_kernel = Generator::mayiuse(avx512_core)
? jit::ComputeHash<avx512_core>::create({jit::FINAL_FOLD})
: jit::ComputeHash<avx2>::create({jit::FINAL_FOLD});

static const uint64_t max_thr_num = 2lu;
uint64_t thr_num = std::min(size_u64 / min_wa_per_thread, max_thr_num);
const uint64_t el_per_thread =
first_thr_kernel->get_vlen() * ((size_u64 / thr_num) / first_thr_kernel->get_vlen());
std::vector<uint8_t> intermediate(thr_num * first_thr_kernel->get_vlen());

parallel_nt_static(static_cast<int>(thr_num), [&](const int ithr, const int nthr) {
uint64_t start = el_per_thread * ithr;
if (start >= size_u64) {
return;
}
uint64_t work_amount = (el_per_thread + start > size_u64) ? size_u64 - start : el_per_thread;
if (util::may_i_use_dynamic_code()) {
if (Generator::mayiuse(avx2)) {
uint64_t result = 0lu;

// Parallel section
constexpr uint64_t min_wa_per_thread = 131072lu; // 2^17
const uint64_t size_u64 = static_cast<uint64_t>(size);
if (size_u64 >= min_wa_per_thread * 2lu) {
static auto first_thr_kernel = Generator::mayiuse(avx512_core)
? jit::ComputeHash<avx512_core>::create({jit::FIRST_THREAD})
: jit::ComputeHash<avx2>::create({jit::FIRST_THREAD});
static auto n_thr_kernel = Generator::mayiuse(avx512_core)
? jit::ComputeHash<avx512_core>::create({jit::N_THREAD})
: jit::ComputeHash<avx2>::create({jit::N_THREAD});
static auto final_fold_kernel = Generator::mayiuse(avx512_core)
? jit::ComputeHash<avx512_core>::create({jit::FINAL_FOLD})
: jit::ComputeHash<avx2>::create({jit::FINAL_FOLD});

static const uint64_t max_thr_num = 2lu;
uint64_t thr_num = std::min(size_u64 / min_wa_per_thread, max_thr_num);
const uint64_t el_per_thread =
first_thr_kernel->get_vlen() * ((size_u64 / thr_num) / first_thr_kernel->get_vlen());
std::vector<uint8_t> intermediate(thr_num * first_thr_kernel->get_vlen());

parallel_nt_static(static_cast<int>(thr_num), [&](const int ithr, const int nthr) {
uint64_t start = el_per_thread * ithr;
if (start >= size_u64) {
return;
}
uint64_t work_amount = (el_per_thread + start > size_u64) ? size_u64 - start : el_per_thread;

jit::ComputeHashCallArgs args;

args.src_ptr = reinterpret_cast<const uint8_t*>(src) + first_thr_kernel->get_vlen() * ithr;
args.dst_ptr = &(intermediate[first_thr_kernel->get_vlen() * ithr]);
args.k_ptr = jit::K_PULL;
args.work_amount = work_amount;
args.size = size_u64;
args.threads_num = thr_num;

if (ithr == 0) {
(*first_thr_kernel)(&args);
} else {
(*n_thr_kernel)(&args);
}
});

jit::ComputeHashCallArgs args;
args.work_amount = size_u64 - el_per_thread * thr_num;
args.src_ptr = reinterpret_cast<const uint8_t*>(src) + size_u64 - args.work_amount;
args.dst_ptr = &result;
args.k_ptr = jit::K_PULL;
args.size = size_u64;
args.intermediate_ptr = intermediate.data();

args.src_ptr = reinterpret_cast<const uint8_t*>(src) + first_thr_kernel->get_vlen() * ithr;
args.dst_ptr = &(intermediate[first_thr_kernel->get_vlen() * ithr]);
(*final_fold_kernel)(&args);
} else {
static auto single_thr_kernel = Generator::mayiuse(avx512_core)
? jit::ComputeHash<avx512_core>::create({jit::SINGLE_THREAD})
: jit::ComputeHash<avx2>::create({jit::SINGLE_THREAD});

jit::ComputeHashCallArgs args;
args.src_ptr = src;
args.dst_ptr = &result;
args.k_ptr = jit::K_PULL;
args.work_amount = work_amount;
args.work_amount = size_u64;
args.size = size_u64;
args.threads_num = thr_num;

if (ithr == 0) {
(*first_thr_kernel)(&args);
} else {
(*n_thr_kernel)(&args);
}
});

jit::ComputeHashCallArgs args;
args.work_amount = size_u64 - el_per_thread * thr_num;
args.src_ptr = reinterpret_cast<const uint8_t*>(src) + size_u64 - args.work_amount;
args.dst_ptr = &result;
args.k_ptr = jit::K_PULL;
args.size = size_u64;
args.intermediate_ptr = intermediate.data();

(*final_fold_kernel)(&args);
} else {
static auto single_thr_kernel = Generator::mayiuse(avx512_core)
? jit::ComputeHash<avx512_core>::create({jit::SINGLE_THREAD})
: jit::ComputeHash<avx2>::create({jit::SINGLE_THREAD});

jit::ComputeHashCallArgs args;
args.src_ptr = src;
args.dst_ptr = &result;
args.k_ptr = jit::K_PULL;
args.work_amount = size_u64;
args.size = size_u64;

(*single_thr_kernel)(&args);
}

return result;
(*single_thr_kernel)(&args);
}

return result;
}
}

#endif // OV_CORE_USE_XBYAK_JIT
Expand Down
Loading