From 42e0d0e36732081f943cb65b130967b74b08b91d Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 30 Aug 2023 17:29:11 +0200 Subject: [PATCH 01/16] Added gaussian distribution --- dpnp/CMakeLists.txt | 2 + dpnp/backend/extensions/rng/CMakeLists.txt | 74 +++++ .../extensions/rng/device/CMakeLists.txt | 75 +++++ .../extensions/rng/device/gaussian.cpp | 258 ++++++++++++++++++ .../extensions/rng/device/gaussian.hpp | 57 ++++ dpnp/backend/extensions/rng/device/rng_py.cpp | 87 ++++++ dpnp/backend/extensions/rng/distr_proxy.hpp | 115 ++++++++ dpnp/backend/extensions/rng/engine_proxy.hpp | 85 ++++++ dpnp/backend/extensions/rng/rng_py.cpp | 89 ++++++ 9 files changed, 842 insertions(+) create mode 100644 dpnp/backend/extensions/rng/CMakeLists.txt create mode 100644 dpnp/backend/extensions/rng/device/CMakeLists.txt create mode 100644 dpnp/backend/extensions/rng/device/gaussian.cpp create mode 100644 dpnp/backend/extensions/rng/device/gaussian.hpp create mode 100644 dpnp/backend/extensions/rng/device/rng_py.cpp create mode 100644 dpnp/backend/extensions/rng/distr_proxy.hpp create mode 100644 dpnp/backend/extensions/rng/engine_proxy.hpp create mode 100644 dpnp/backend/extensions/rng/rng_py.cpp diff --git a/dpnp/CMakeLists.txt b/dpnp/CMakeLists.txt index 9c79d5af385e..2e531b5fafcf 100644 --- a/dpnp/CMakeLists.txt +++ b/dpnp/CMakeLists.txt @@ -58,6 +58,8 @@ build_dpnp_cython_ext_with_backend(dparray ${CMAKE_CURRENT_SOURCE_DIR}/dparray.p add_subdirectory(backend) add_subdirectory(backend/extensions/blas) add_subdirectory(backend/extensions/lapack) +add_subdirectory(backend/extensions/rng) +add_subdirectory(backend/extensions/rng/device) add_subdirectory(backend/extensions/vm) add_subdirectory(backend/extensions/sycl_ext) diff --git a/dpnp/backend/extensions/rng/CMakeLists.txt b/dpnp/backend/extensions/rng/CMakeLists.txt new file mode 100644 index 000000000000..a3821e96dcc7 --- /dev/null +++ b/dpnp/backend/extensions/rng/CMakeLists.txt @@ -0,0 +1,74 @@ +# ***************************************************************************** +# Copyright (c) 2023, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + + +set(python_module_name _rng_impl) +pybind11_add_module(${python_module_name} MODULE + rng_py.cpp +) + +if (WIN32) + if (${CMAKE_VERSION} VERSION_LESS "3.27") + # this is a work-around for target_link_options inserting option after -link option, cause + # linker to ignore it. + set(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -fsycl-device-code-split=per_kernel") + endif() +endif() + +set_target_properties(${python_module_name} PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON) + +target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include) +target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src) + +target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIRS}) +target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR}) + +if (WIN32) + target_compile_options(${python_module_name} PRIVATE + /clang:-fno-approx-func + /clang:-fno-finite-math-only + ) +else() + target_compile_options(${python_module_name} PRIVATE + -fno-approx-func + -fno-finite-math-only + ) +endif() + +target_link_options(${python_module_name} PUBLIC -fsycl-device-code-split=per_kernel) +if (UNIX) + # this option is support on Linux only + target_link_options(${python_module_name} PUBLIC -fsycl-link-huge-device-code) +endif() + +if (DPNP_GENERATE_COVERAGE) + target_link_options(${python_module_name} PRIVATE -fprofile-instr-generate -fcoverage-mapping) +endif() + +target_link_libraries(${python_module_name} PUBLIC MKL::MKL_DPCPP) + +install(TARGETS ${python_module_name} + DESTINATION "dpnp/backend/extensions/rng" +) diff --git a/dpnp/backend/extensions/rng/device/CMakeLists.txt b/dpnp/backend/extensions/rng/device/CMakeLists.txt new file mode 100644 index 000000000000..8521ade5a6c8 --- /dev/null +++ b/dpnp/backend/extensions/rng/device/CMakeLists.txt @@ -0,0 +1,75 @@ +# ***************************************************************************** +# Copyright (c) 2023, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + + +set(python_module_name _rng_dev_impl) +pybind11_add_module(${python_module_name} MODULE + rng_py.cpp + gaussian.cpp +) + +if (WIN32) + if (${CMAKE_VERSION} VERSION_LESS "3.27") + # this is a work-around for target_link_options inserting option after -link option, cause + # linker to ignore it. + set(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -fsycl-device-code-split=per_kernel") + endif() +endif() + +set_target_properties(${python_module_name} PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON) + +target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include) +target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src) + +target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIRS}) +target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR}) + +if (WIN32) + target_compile_options(${python_module_name} PRIVATE + /clang:-fno-approx-func + /clang:-fno-finite-math-only + ) +else() + target_compile_options(${python_module_name} PRIVATE + -fno-approx-func + -fno-finite-math-only + ) +endif() + +target_link_options(${python_module_name} PUBLIC -fsycl-device-code-split=per_kernel) +if (UNIX) + # this option is support on Linux only + target_link_options(${python_module_name} PUBLIC -fsycl-link-huge-device-code) +endif() + +if (DPNP_GENERATE_COVERAGE) + target_link_options(${python_module_name} PRIVATE -fprofile-instr-generate -fcoverage-mapping) +endif() + +target_link_libraries(${python_module_name} PUBLIC MKL::MKL_DPCPP) + +install(TARGETS ${python_module_name} + DESTINATION "dpnp/backend/extensions/rng/device" +) diff --git a/dpnp/backend/extensions/rng/device/gaussian.cpp b/dpnp/backend/extensions/rng/device/gaussian.cpp new file mode 100644 index 000000000000..8e03a9125c93 --- /dev/null +++ b/dpnp/backend/extensions/rng/device/gaussian.cpp @@ -0,0 +1,258 @@ +//***************************************************************************** +// Copyright (c) 2023, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include + +// dpctl tensor headers +#include "utils/memory_overlap.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +// dpctl tensor headers +#include "kernels/alignment.hpp" + +using dpctl::tensor::kernels::alignment_utils::is_aligned; +using dpctl::tensor::kernels::alignment_utils::required_alignment; + +#include "gaussian.hpp" + +// #include "dpnp_utils.hpp" + +namespace dpnp +{ +namespace backend +{ +namespace ext +{ +namespace rng +{ +namespace device +{ +namespace dpctl_td_ns = dpctl::tensor::type_dispatch; +namespace mkl_rng_dev = oneapi::mkl::rng::device; +namespace py = pybind11; +namespace type_utils = dpctl::tensor::type_utils; + +typedef sycl::event (*gaussian_impl_fn_ptr_t)(sycl::queue &, + const std::uint32_t, + const double, + const double, + const std::uint64_t, + char *, + std::vector &, + const std::vector &); + +static gaussian_impl_fn_ptr_t gaussian_dispatch_vector[dpctl_td_ns::num_types]; + +// template +template +class gaussian_kernel; + +// template +template +static sycl::event gaussian_impl(sycl::queue& exec_q, + const std::uint32_t seed, + const double mean_val, + const double stddev_val, + const std::uint64_t n, + char *out_ptr, + std::vector &host_task_events, + const std::vector &depends) +{ + type_utils::validate_type_for_device(exec_q); + + using Method = mkl_rng_dev::gaussian_method::by_default; + + const bool enable_sg_load = is_aligned(out_ptr); + DataType *out = reinterpret_cast(out_ptr); + DataType mean = static_cast(mean_val); + DataType stddev = static_cast(stddev_val); + + constexpr std::size_t vec_sz = 8; + constexpr std::size_t items_per_wi = 4; + constexpr std::size_t local_size = 256; + const std::size_t wg_items = local_size * vec_sz * items_per_wi; + const std::size_t global_size = ((n + wg_items - 1) / (wg_items)) * local_size; + + sycl::event distr_event; + + try { + distr_event = exec_q.parallel_for>( + sycl::nd_range<1>({global_size}, {local_size}), depends, + [=](sycl::nd_item<1> nd_it) + { + auto global_id = nd_it.get_global_id(); + + auto sg = nd_it.get_sub_group(); + const std::uint8_t sg_size = sg.get_local_range()[0]; + const std::uint8_t max_sg_size = sg.get_max_local_range()[0]; + const size_t base = items_per_wi * vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) + sg.get_group_id()[0] * max_sg_size); + + mkl_rng_dev::gaussian distr(mean, stddev); + + if (enable_sg_load && (sg_size == max_sg_size) && (base + items_per_wi * vec_sz * sg_size < n)) { + auto engine = mkl_rng_dev::mrg32k3a(seed, n * global_id); + +#pragma unroll + for (std::uint16_t it = 0; it < items_per_wi * vec_sz; it += vec_sz) { + size_t offset = base + static_cast(it) * static_cast(sg_size); + auto out_multi_ptr = sycl::address_space_cast(&out[offset]); + + sycl::vec rng_val_vec = mkl_rng_dev::generate(distr, engine); + sg.store(out_multi_ptr, rng_val_vec); + } + } + else { + auto engine = mkl_rng_dev::mrg32k3a(seed, n * global_id); + + for (size_t k = base + sg.get_local_id()[0]; k < n; k += sg_size) { + out[k] = mkl_rng_dev::generate(distr, engine); + } + } + }); + } catch (oneapi::mkl::exception const &e) { + std::stringstream error_msg; + + error_msg << "Unexpected MKL exception caught during gaussian call:\nreason: " << e.what(); + throw std::runtime_error(error_msg.str()); + } + return distr_event; +} + +std::pair gaussian(sycl::queue exec_q, + const std::uint32_t seed, + const double mean, + const double stddev, + const std::uint64_t n, + dpctl::tensor::usm_ndarray res, + const std::vector &depends) +{ + const int res_nd = res.get_ndim(); + + // if (eig_vecs_nd != 2) { + // throw py::value_error("Unexpected ndim=" + std::to_string(eig_vecs_nd) + + // " of an output array with eigenvectors"); + // } + // else if (eig_vals_nd != 1) { + // throw py::value_error("Unexpected ndim=" + std::to_string(eig_vals_nd) + + // " of an output array with eigenvalues"); + // } + + const py::ssize_t *res_shape = res.get_shape_raw(); + + // if (eig_vecs_shape[0] != eig_vecs_shape[1]) { + // throw py::value_error("Output array with eigenvectors with be square"); + // } + // else if (eig_vecs_shape[0] != eig_vals_shape[0]) { + // throw py::value_error( + // "Eigenvectors and eigenvalues have different shapes"); + // } + + size_t src_nelems(1); + + for (int i = 0; i < res_nd; ++i) { + src_nelems *= static_cast(res_shape[i]); + } + + if (src_nelems == 0) { + // nothing to do + return std::make_pair(sycl::event(), sycl::event()); + } + + // check compatibility of execution queue and allocation queue + // if (!dpctl::utils::queues_are_compatible(exec_q, {eig_vecs, eig_vals})) { + // throw py::value_error( + // "Execution queue is not compatible with allocation queues"); + // } + + // auto const &overlap = dpctl::tensor::overlap::MemoryOverlap(); + // if (overlap(eig_vecs, eig_vals)) { + // throw py::value_error("Arrays with eigenvectors and eigenvalues are " + // "overlapping segments of memory"); + // } + + bool is_res_c_contig = res.is_c_contiguous(); + if (!is_res_c_contig) { + throw py::value_error( + "An array with input matrix must be C-contiguous"); + } + + auto array_types = dpctl_td_ns::usm_ndarray_types(); + int res_type_id = + array_types.typenum_to_lookup_id(res.get_typenum()); + + gaussian_impl_fn_ptr_t gaussian_fn = gaussian_dispatch_vector[res_type_id]; + if (gaussian_fn == nullptr) { + throw py::value_error("No gaussian implementation defined for a required type"); + } + + char *res_data = res.get_data(); + + std::vector host_task_events; + sycl::event gaussian_ev = + gaussian_fn(exec_q, seed, mean, stddev, n, res_data, + host_task_events, depends); + + sycl::event args_ev = dpctl::utils::keep_args_alive( + exec_q, {res}, host_task_events); + return std::make_pair(args_ev, gaussian_ev); +} + +template +struct GaussianTypePairSupportFactory +{ + static constexpr bool is_defined = std::disjunction< + dpctl_td_ns::TypePairDefinedEntry, + dpctl_td_ns::TypePairDefinedEntry, + // fall-through + dpctl_td_ns::NotDefinedEntry>::is_defined; +}; + +template +struct GaussianContigFactory +{ + fnT get() + { + if constexpr (GaussianTypePairSupportFactory::is_defined) { + return gaussian_impl; + } + else { + return nullptr; + } + } +}; + +void init_gaussian_dispatch_vector(void) +{ + dpctl_td_ns::DispatchVectorBuilder + contig; + contig.populate_dispatch_vector(gaussian_dispatch_vector); +} +} // namespace device +} // namespace rng +} // namespace ext +} // namespace backend +} // namespace dpnp diff --git a/dpnp/backend/extensions/rng/device/gaussian.hpp b/dpnp/backend/extensions/rng/device/gaussian.hpp new file mode 100644 index 000000000000..adbc6697ad95 --- /dev/null +++ b/dpnp/backend/extensions/rng/device/gaussian.hpp @@ -0,0 +1,57 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include +#include +#include + +#include + +namespace dpnp +{ +namespace backend +{ +namespace ext +{ +namespace rng +{ +namespace device +{ +extern std::pair gaussian(sycl::queue exec_q, + const std::uint32_t seed, + const double mean, + const double stddev, + const std::uint64_t n, + dpctl::tensor::usm_ndarray res, + const std::vector &depends = {}); + +extern void init_gaussian_dispatch_vector(void); +} // namespace device +} // namespace rng +} // namespace ext +} // namespace backend +} // namespace dpnp diff --git a/dpnp/backend/extensions/rng/device/rng_py.cpp b/dpnp/backend/extensions/rng/device/rng_py.cpp new file mode 100644 index 000000000000..dffabee5f267 --- /dev/null +++ b/dpnp/backend/extensions/rng/device/rng_py.cpp @@ -0,0 +1,87 @@ +//***************************************************************************** +// Copyright (c) 2023, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +// This file defines functions of dpnp.backend._rng_impl extensions +// +//***************************************************************************** + +#include +#include + +#include + +#include + +#include "gaussian.hpp" + +namespace mkl_rng = oneapi::mkl::rng; +namespace rng_dev_ext = dpnp::backend::ext::rng::device; +namespace py = pybind11; + +// populate dispatch vectors +void init_dispatch_vectors(void) +{ + rng_dev_ext::init_gaussian_dispatch_vector(); +} + +// populate dispatch tables +void init_dispatch_tables(void) +{ + // lapack_ext::init_heevd_dispatch_table(); +} + + +PYBIND11_MODULE(_rng_dev_impl, m) +{ + // using engine_base_t = rng_ext::EngineBase; + // py::class_ engine_base(m, "EngineBase"); + // engine_base.def(py::init()) + // .def("get_queue", &engine_base_t::get_queue); + + // using mt19937_engine_t = rng_ext::EngineProxy; + // py::class_(m, "mt19937", engine_base) + // .def(py::init()) + // .def(py::init>()); + + // using mcg59_engine_t = rng_ext::EngineProxy; + // py::class_(m, "mcg59", engine_base) + // .def(py::init()); + + init_dispatch_vectors(); + init_dispatch_tables(); + + // m.def("_heevd", &lapack_ext::heevd, + // "Call `heevd` from OneMKL LAPACK library to return " + // "the eigenvalues and eigenvectors of a complex Hermitian matrix", + // py::arg("sycl_queue"), py::arg("jobz"), py::arg("upper_lower"), + // py::arg("eig_vecs"), py::arg("eig_vals"), + // py::arg("depends") = py::list()); + + m.def("_gaussian", &rng_dev_ext::gaussian, + "", + py::arg("sycl_queue"), py::arg("seed"), py::arg("mean"), py::arg("stddev"), + py::arg("n"), py::arg("res"), + py::arg("depends") = py::list()); +} diff --git a/dpnp/backend/extensions/rng/distr_proxy.hpp b/dpnp/backend/extensions/rng/distr_proxy.hpp new file mode 100644 index 000000000000..69c895f0608d --- /dev/null +++ b/dpnp/backend/extensions/rng/distr_proxy.hpp @@ -0,0 +1,115 @@ +//***************************************************************************** +// Copyright (c) 2023, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include + +#include + +namespace dpnp +{ +namespace backend +{ +namespace ext +{ +namespace rng +{ +namespace mkl_rng = oneapi::mkl::rng; + +template +class UnifromBase { +public: + using method_type = Method; + using result_type = DataType; + using distr_type = mkl_rng::uniform; + + + UnifromBase(DataType a, DataType b) { + distr = std::make_unique(a, b); + } + + distr_type& get_distr() const { return *distr; } + +private: + std::unique_ptr distr; +} + +class UniformB { +public: + + Uniform(sycl::queue queue, const int type_num, double a, double b): q(queue) { + if () + } + + void generate() { + // create distribution + } + +private: + int typenum; + std::unique_ptr engine; + + + + using method_type = Method; + // using result_type = RealType; + + DistrProxy(sycl::queue queue, std::uint32_t seed): q(queue) { + engine = std::make_unique(queue, seed); + }; + + // template + DistrProxy(sycl::queue queue, std::vector vec_seed): q(queue) { + switch (vec_seed.size()) { + case 1: + engine = std::make_unique(queue, std::initializer_list({vec_seed[0]})); + break; + case 2: + engine = std::make_unique(queue, std::initializer_list({vec_seed[0], vec_seed[1]})); + break; + case 3: + engine = std::make_unique(queue, std::initializer_list({vec_seed[0], vec_seed[1], vec_seed[2]})); + break; + default: + // TODO need to get rid of the limitation for seed vector length + throw std::runtime_error("Too long seed vector"); + } + }; + + ~DistrProxy() = default; + + sycl::queue& get_queue() { return q;} + EngineT& get_engine() const { return *engine;} + +private: + sycl::queue q; + std::unique_ptr engine; + // engine_t engine; + // sycl::queue q; + +}; +} // namespace lapack +} // namespace ext +} // namespace backend +} // namespace rng diff --git a/dpnp/backend/extensions/rng/engine_proxy.hpp b/dpnp/backend/extensions/rng/engine_proxy.hpp new file mode 100644 index 000000000000..89346718feff --- /dev/null +++ b/dpnp/backend/extensions/rng/engine_proxy.hpp @@ -0,0 +1,85 @@ +//***************************************************************************** +// Copyright (c) 2023, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include + +namespace dpnp +{ +namespace backend +{ +namespace ext +{ +namespace rng +{ +class EngineBase { +public: + EngineBase(sycl::queue queue) { + q = std::make_unique(queue); + }; + + sycl::queue& get_queue() { return *q; } + +private: + std::unique_ptr q; +}; + + +template +class EngineProxy: public EngineBase { +public: + using engine_t = EngineT; + + EngineProxy(sycl::queue queue, SeedT seed): EngineBase(queue) { + engine = std::make_unique(queue, seed); + }; + + // template + EngineProxy(sycl::queue queue, std::vector vec_seed): EngineBase(queue) { + switch (vec_seed.size()) { + case 1: + engine = std::make_unique(queue, std::initializer_list({vec_seed[0]})); + break; + case 2: + engine = std::make_unique(queue, std::initializer_list({vec_seed[0], vec_seed[1]})); + break; + case 3: + engine = std::make_unique(queue, std::initializer_list({vec_seed[0], vec_seed[1], vec_seed[2]})); + break; + default: + // TODO need to get rid of the limitation for seed vector length + throw std::runtime_error("Too long seed vector"); + } + }; + + // ~EngineProxy() = default; + +private: + std::unique_ptr engine; + +}; +} // namespace lapack +} // namespace ext +} // namespace backend +} // namespace rng diff --git a/dpnp/backend/extensions/rng/rng_py.cpp b/dpnp/backend/extensions/rng/rng_py.cpp new file mode 100644 index 000000000000..a360f3c8140e --- /dev/null +++ b/dpnp/backend/extensions/rng/rng_py.cpp @@ -0,0 +1,89 @@ +//***************************************************************************** +// Copyright (c) 2023, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +// This file defines functions of dpnp.backend._rng_impl extensions +// +//***************************************************************************** + +#include +#include + +#include + +#include + +#include "engine_proxy.hpp" +// #include "syevd.hpp" + +namespace mkl_rng = oneapi::mkl::rng; +namespace rng_ext = dpnp::backend::ext::rng; +namespace py = pybind11; + +// populate dispatch vectors +void init_dispatch_vectors(void) +{ + // lapack_ext::init_syevd_dispatch_vector(); +} + +// populate dispatch tables +void init_dispatch_tables(void) +{ + // lapack_ext::init_heevd_dispatch_table(); +} + + +PYBIND11_MODULE(_rng_impl, m) +{ + using engine_base_t = rng_ext::EngineBase; + py::class_ engine_base(m, "EngineBase"); + engine_base.def(py::init()) + .def("get_queue", &engine_base_t::get_queue); + + using mt19937_engine_t = rng_ext::EngineProxy; + py::class_(m, "mt19937", engine_base) + .def(py::init()) + .def(py::init>()); + + using mcg59_engine_t = rng_ext::EngineProxy; + py::class_(m, "mcg59", engine_base) + .def(py::init()); + + // init_dispatch_vectors(); + // init_dispatch_tables(); + + // m.def("_heevd", &lapack_ext::heevd, + // "Call `heevd` from OneMKL LAPACK library to return " + // "the eigenvalues and eigenvectors of a complex Hermitian matrix", + // py::arg("sycl_queue"), py::arg("jobz"), py::arg("upper_lower"), + // py::arg("eig_vecs"), py::arg("eig_vals"), + // py::arg("depends") = py::list()); + + // m.def("_syevd", &lapack_ext::syevd, + // "Call `syevd` from OneMKL LAPACK library to return " + // "the eigenvalues and eigenvectors of a real symmetric matrix", + // py::arg("sycl_queue"), py::arg("jobz"), py::arg("upper_lower"), + // py::arg("eig_vecs"), py::arg("eig_vals"), + // py::arg("depends") = py::list()); +} From 97fc0473dca35bec86eb21bd6cb694b98a84d0fd Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Thu, 18 Jan 2024 16:17:02 +0100 Subject: [PATCH 02/16] Added strided kernel --- .../extensions/rng/device/common_impl.hpp | 158 ++++++++++ .../extensions/rng/device/gaussian.cpp | 283 +++++++++++++----- 2 files changed, 366 insertions(+), 75 deletions(-) create mode 100644 dpnp/backend/extensions/rng/device/common_impl.hpp diff --git a/dpnp/backend/extensions/rng/device/common_impl.hpp b/dpnp/backend/extensions/rng/device/common_impl.hpp new file mode 100644 index 000000000000..a59dadbeee3b --- /dev/null +++ b/dpnp/backend/extensions/rng/device/common_impl.hpp @@ -0,0 +1,158 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + +#include +#include + +// dpctl tensor headers +#include "kernels/alignment.hpp" +#include "utils/offset_utils.hpp" + +namespace dpnp +{ +namespace backend +{ +namespace ext +{ +namespace rng +{ +namespace device +{ +namespace details +{ +namespace py = pybind11; + +using dpctl::tensor::kernels::alignment_utils::is_aligned; +using dpctl::tensor::kernels::alignment_utils::required_alignment; + +namespace mkl_rng_dev = oneapi::mkl::rng::device; + +/*! @brief Functor for unary function evaluation on contiguous array */ +template +struct RngContigFunctor +{ +private: + const std::uint32_t seed_; + const DataT mean_; + const DataT stddev_; + ResT *res_ = nullptr; + const size_t nelems_; + +public: + RngContigFunctor(const std::uint32_t seed, const DataT mean, const DataT stddev, ResT *res, const size_t n_elems) + : seed_(seed), mean_(mean), stddev_(stddev), res_(res), nelems_(n_elems) + { + } + + void operator()(sycl::nd_item<1> nd_it) const + { + auto global_id = nd_it.get_global_id(); + + auto sg = nd_it.get_sub_group(); + const std::uint8_t sg_size = sg.get_local_range()[0]; + const std::uint8_t max_sg_size = sg.get_max_local_range()[0]; + + auto engine = mkl_rng_dev::mrg32k3a(seed_, nelems_ * global_id); + mkl_rng_dev::gaussian distr(mean_, stddev_); + + if (enable_sg_load) { + const size_t base = items_per_wi * vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) + sg.get_group_id()[0] * max_sg_size); + + if ((sg_size == max_sg_size) && (base + items_per_wi * vec_sz * sg_size < nelems_)) { +#pragma unroll + for (std::uint16_t it = 0; it < items_per_wi * vec_sz; it += vec_sz) { + size_t offset = base + static_cast(it) * static_cast(sg_size); + auto out_multi_ptr = sycl::address_space_cast(&res_[offset]); + + sycl::vec rng_val_vec = mkl_rng_dev::generate(distr, engine); + sg.store(out_multi_ptr, rng_val_vec); + } + } + else { + for (size_t offset = base + sg.get_local_id()[0]; offset < nelems_; offset += sg_size) { + res_[offset] = mkl_rng_dev::generate_single(distr, engine); + } + } + } + else { + size_t base = nd_it.get_global_linear_id(); + + base = (base / sg_size) * sg_size * items_per_wi * vec_sz + (base % sg_size); + for (size_t offset = base; offset < std::min(nelems_, base + sg_size * (items_per_wi * vec_sz)); offset += sg_size) + { + res_[offset] = mkl_rng_dev::generate_single(distr, engine); + } + } + } +}; + +template +struct RngStridedFunctor +{ +private: + const std::uint32_t seed_; + const double mean_; + const double stddev_; + ResT *res_ = nullptr; + IndexerT out_indexer_; + +public: + RngStridedFunctor(const std::uint32_t seed, const double mean, const double stddev, ResT *res_p, IndexerT out_indexer) + : seed_(seed), mean_(mean), stddev_(stddev), res_(res_p), out_indexer_(out_indexer) + { + } + + void operator()(sycl::id<1> wid) const + { + const auto res_offset = out_indexer_(wid.get(0)); + + // UnaryOpT op{}; + + auto engine = mkl_rng_dev::mrg32k3a(seed_); + mkl_rng_dev::gaussian distr(mean_, stddev_); + + res_[res_offset] = mkl_rng_dev::generate(distr, engine); + } +}; +} // namespace details +} // namespace device +} // namespace rng +} // namespace ext +} // namespace backend +} // namespace dpnp \ No newline at end of file diff --git a/dpnp/backend/extensions/rng/device/gaussian.cpp b/dpnp/backend/extensions/rng/device/gaussian.cpp index 8e03a9125c93..382108768a18 100644 --- a/dpnp/backend/extensions/rng/device/gaussian.cpp +++ b/dpnp/backend/extensions/rng/device/gaussian.cpp @@ -33,9 +33,11 @@ // dpctl tensor headers #include "kernels/alignment.hpp" +using dpctl::tensor::kernels::alignment_utils::disabled_sg_loadstore_wrapper_krn; using dpctl::tensor::kernels::alignment_utils::is_aligned; using dpctl::tensor::kernels::alignment_utils::required_alignment; +#include "common_impl.hpp" #include "gaussian.hpp" // #include "dpnp_utils.hpp" @@ -55,40 +57,51 @@ namespace mkl_rng_dev = oneapi::mkl::rng::device; namespace py = pybind11; namespace type_utils = dpctl::tensor::type_utils; -typedef sycl::event (*gaussian_impl_fn_ptr_t)(sycl::queue &, +typedef sycl::event (*gaussian_contig_impl_fn_ptr_t)(sycl::queue &, const std::uint32_t, const double, const double, const std::uint64_t, char *, - std::vector &, const std::vector &); -static gaussian_impl_fn_ptr_t gaussian_dispatch_vector[dpctl_td_ns::num_types]; -// template -template -class gaussian_kernel; +typedef sycl::event (*gaussian_strided_impl_fn_ptr_t)(sycl::queue &, + const std::uint32_t, + const double, + const double, + const std::uint64_t size_t, + int, + const py::ssize_t *, + char *, + py::ssize_t, + const std::vector &, + const std::vector &); + +// static gaussian_impl_fn_ptr_t gaussian_dispatch_vector[dpctl_td_ns::num_types]; -// template -template -static sycl::event gaussian_impl(sycl::queue& exec_q, +static gaussian_contig_impl_fn_ptr_t gaussian_contig_dispatch_vector[dpctl_td_ns::num_types]; +static gaussian_strided_impl_fn_ptr_t gaussian_strided_dispatch_vector[dpctl_td_ns::num_types]; + +// template +template +class gaussian_contig_kernel; + +template +static sycl::event gaussian_contig_impl(sycl::queue& exec_q, const std::uint32_t seed, const double mean_val, const double stddev_val, const std::uint64_t n, char *out_ptr, - std::vector &host_task_events, const std::vector &depends) { - type_utils::validate_type_for_device(exec_q); + type_utils::validate_type_for_device(exec_q); - using Method = mkl_rng_dev::gaussian_method::by_default; - - const bool enable_sg_load = is_aligned(out_ptr); - DataType *out = reinterpret_cast(out_ptr); - DataType mean = static_cast(mean_val); - DataType stddev = static_cast(stddev_val); + // const bool enable_sg_load = is_aligned(out_ptr); + DataT *out = reinterpret_cast(out_ptr); + DataT mean = static_cast(mean_val); + DataT stddev = static_cast(stddev_val); constexpr std::size_t vec_sz = 8; constexpr std::size_t items_per_wi = 4; @@ -99,44 +112,135 @@ static sycl::event gaussian_impl(sycl::queue& exec_q, sycl::event distr_event; try { - distr_event = exec_q.parallel_for>( - sycl::nd_range<1>({global_size}, {local_size}), depends, - [=](sycl::nd_item<1> nd_it) - { - auto global_id = nd_it.get_global_id(); - - auto sg = nd_it.get_sub_group(); - const std::uint8_t sg_size = sg.get_local_range()[0]; - const std::uint8_t max_sg_size = sg.get_max_local_range()[0]; - const size_t base = items_per_wi * vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) + sg.get_group_id()[0] * max_sg_size); - - mkl_rng_dev::gaussian distr(mean, stddev); - - if (enable_sg_load && (sg_size == max_sg_size) && (base + items_per_wi * vec_sz * sg_size < n)) { - auto engine = mkl_rng_dev::mrg32k3a(seed, n * global_id); - -#pragma unroll - for (std::uint16_t it = 0; it < items_per_wi * vec_sz; it += vec_sz) { - size_t offset = base + static_cast(it) * static_cast(sg_size); - auto out_multi_ptr = sycl::address_space_cast(&out[offset]); - - sycl::vec rng_val_vec = mkl_rng_dev::generate(distr, engine); - sg.store(out_multi_ptr, rng_val_vec); - } - } - else { - auto engine = mkl_rng_dev::mrg32k3a(seed, n * global_id); - - for (size_t k = base + sg.get_local_id()[0]; k < n; k += sg_size) { - out[k] = mkl_rng_dev::generate(distr, engine); - } - } + distr_event = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(depends); + + if (is_aligned(out_ptr)) { + constexpr bool enable_sg_load = true; + using KernelName = gaussian_contig_kernel; + + cgh.parallel_for(sycl::nd_range<1>({global_size}, {local_size}), + details::RngContigFunctor(seed, mean, stddev, out, n)); + } + else { + constexpr bool disable_sg_load = false; + using InnerKernelName = gaussian_contig_kernel; + using KernelName = disabled_sg_loadstore_wrapper_krn; + + cgh.parallel_for(sycl::nd_range<1>({global_size}, {local_size}), + details::RngContigFunctor(seed, mean, stddev, out, n)); + } }); + +// distr_event = exec_q.parallel_for>( +// sycl::nd_range<1>({global_size}, {local_size}), depends, +// [=](sycl::nd_item<1> nd_it) +// { +// auto global_id = nd_it.get_global_id(); + +// auto sg = nd_it.get_sub_group(); +// const std::uint8_t sg_size = sg.get_local_range()[0]; +// const std::uint8_t max_sg_size = sg.get_max_local_range()[0]; + +// auto engine = mkl_rng_dev::mrg32k3a(seed, n * global_id); +// mkl_rng_dev::gaussian distr(mean, stddev); + +// if (enable_sg_load) { +// const size_t base = items_per_wi * vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) + sg.get_group_id()[0] * max_sg_size); + +// if ((sg_size == max_sg_size) && (base + items_per_wi * vec_sz * sg_size < n)) { +// #pragma unroll +// for (std::uint16_t it = 0; it < items_per_wi * vec_sz; it += vec_sz) { +// size_t offset = base + static_cast(it) * static_cast(sg_size); +// auto out_multi_ptr = sycl::address_space_cast(&out[offset]); + +// sycl::vec rng_val_vec = mkl_rng_dev::generate(distr, engine); +// sg.store(out_multi_ptr, rng_val_vec); +// } +// } +// else { +// for (size_t offset = base + sg.get_local_id()[0]; offset < n; offset += sg_size) { +// out[offset] = mkl_rng_dev::generate_single(distr, engine); +// } +// } +// } +// else { +// size_t base = nd_it.get_global_linear_id(); + +// base = (base / sg_size) * sg_size * items_per_wi * vec_sz + (base % sg_size); +// for (size_t offset = base; offset < std::min(n, base + sg_size * (items_per_wi * vec_sz)); offset += sg_size) +// { +// out[offset] = mkl_rng_dev::generate_single(distr, engine); +// } +// } +// }); } catch (oneapi::mkl::exception const &e) { std::stringstream error_msg; error_msg << "Unexpected MKL exception caught during gaussian call:\nreason: " << e.what(); throw std::runtime_error(error_msg.str()); + } catch (sycl::exception const &e) { + std::stringstream error_msg; + + error_msg << "Unexpected SYCL exception caught during gaussian call:\n" << e.what(); + throw std::runtime_error(error_msg.str()); + } + return distr_event; +} + +template +class gaussian_strided_kernel; + +template +sycl::event gaussian_strided_impl(sycl::queue &exec_q, + const std::uint32_t seed, + const double mean_val, + const double stddev_val, + size_t n, + int nd, + const py::ssize_t *shape_and_strides, + char *out_ptr, + py::ssize_t out_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + type_utils::validate_type_for_device(exec_q); + + DataT *out = reinterpret_cast(out_ptr); + DataT mean = static_cast(mean_val); + DataT stddev = static_cast(stddev_val); + + using IndexerT = typename dpctl::tensor::offset_utils::StridedIndexer; + IndexerT indexer{nd, out_offset, shape_and_strides}; + + sycl::event distr_event; + + try { + distr_event = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(depends); + cgh.depends_on(additional_depends); + + // using resTy = typename UnaryOutputType::value_type; + // using IndexerT = typename dpctl::tensor::offset_utils::StridedIndexer; + using KernelName = gaussian_strided_kernel; + + // IndexerT indexer{nd, res_offset, shape_and_strides}; + // DataT *out = reinterpret_cast(out_ptr); + + cgh.parallel_for( + {n}, + details::RngStridedFunctor(seed, mean, stddev, out, indexer)); + }); + } catch (oneapi::mkl::exception const &e) { + std::stringstream error_msg; + + error_msg << "Unexpected MKL exception caught during strided gaussian call:\nreason: " << e.what(); + throw std::runtime_error(error_msg.str()); + } catch (sycl::exception const &e) { + std::stringstream error_msg; + + error_msg << "Unexpected SYCL exception caught during strided gaussian call:\n" << e.what(); + throw std::runtime_error(error_msg.str()); } return distr_event; } @@ -181,42 +285,52 @@ std::pair gaussian(sycl::queue exec_q, return std::make_pair(sycl::event(), sycl::event()); } - // check compatibility of execution queue and allocation queue - // if (!dpctl::utils::queues_are_compatible(exec_q, {eig_vecs, eig_vals})) { - // throw py::value_error( - // "Execution queue is not compatible with allocation queues"); - // } + // ensure that output is ample enough to accommodate all elements + auto res_offsets = res.get_minmax_offsets(); + // destination must be ample enough to accommodate all elements + { + size_t range = + static_cast(res_offsets.second - res_offsets.first); + if (range + 1 < src_nelems) { + throw py::value_error( + "Destination array can not accommodate all the elements of source array."); + } + } - // auto const &overlap = dpctl::tensor::overlap::MemoryOverlap(); - // if (overlap(eig_vecs, eig_vals)) { - // throw py::value_error("Arrays with eigenvectors and eigenvalues are " - // "overlapping segments of memory"); - // } + char *res_data = res.get_data(); + + auto array_types = dpctl_td_ns::usm_ndarray_types(); + int res_type_id = array_types.typenum_to_lookup_id(res.get_typenum()); bool is_res_c_contig = res.is_c_contiguous(); - if (!is_res_c_contig) { - throw py::value_error( - "An array with input matrix must be C-contiguous"); + if (is_res_c_contig) { + auto contig_fn = gaussian_contig_dispatch_vector[res_type_id]; + + if (contig_fn == nullptr) { + throw std::runtime_error( + "Contiguous implementation is missing for res_type_id=" + + std::to_string(res_type_id)); + } + + auto comp_ev = contig_fn(exec_q, seed, mean, stddev, n, res_data, depends); + sycl::event ht_ev = dpctl::utils::keep_args_alive(exec_q, {res}, {comp_ev}); + + return std::make_pair(ht_ev, comp_ev); } - auto array_types = dpctl_td_ns::usm_ndarray_types(); - int res_type_id = - array_types.typenum_to_lookup_id(res.get_typenum()); - gaussian_impl_fn_ptr_t gaussian_fn = gaussian_dispatch_vector[res_type_id]; + + auto gaussian_fn = gaussian_contig_dispatch_vector[res_type_id]; if (gaussian_fn == nullptr) { throw py::value_error("No gaussian implementation defined for a required type"); } - char *res_data = res.get_data(); - - std::vector host_task_events; + // std::vector host_task_events; sycl::event gaussian_ev = - gaussian_fn(exec_q, seed, mean, stddev, n, res_data, - host_task_events, depends); + gaussian_fn(exec_q, seed, mean, stddev, n, res_data, depends); sycl::event args_ev = dpctl::utils::keep_args_alive( - exec_q, {res}, host_task_events); + exec_q, {res}/*, host_task_events*/); return std::make_pair(args_ev, gaussian_ev); } @@ -236,7 +350,21 @@ struct GaussianContigFactory fnT get() { if constexpr (GaussianTypePairSupportFactory::is_defined) { - return gaussian_impl; + return gaussian_contig_impl; + } + else { + return nullptr; + } + } +}; + +template +struct GaussianStridedFactory +{ + fnT get() + { + if constexpr (GaussianTypePairSupportFactory::is_defined) { + return gaussian_strided_impl; } else { return nullptr; @@ -246,10 +374,15 @@ struct GaussianContigFactory void init_gaussian_dispatch_vector(void) { - dpctl_td_ns::DispatchVectorBuilder contig; - contig.populate_dispatch_vector(gaussian_dispatch_vector); + contig.populate_dispatch_vector(gaussian_contig_dispatch_vector); + + dpctl_td_ns::DispatchVectorBuilder + strided; + strided.populate_dispatch_vector(gaussian_strided_dispatch_vector); } } // namespace device } // namespace rng From bc5ec396f1c817b7e83fa08d5f0d0da5d4711067 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Thu, 18 Jan 2024 16:25:33 +0100 Subject: [PATCH 03/16] Disabled strided implementation --- .../extensions/rng/device/common_impl.hpp | 76 +++--- .../extensions/rng/device/gaussian.cpp | 218 ++---------------- 2 files changed, 59 insertions(+), 235 deletions(-) diff --git a/dpnp/backend/extensions/rng/device/common_impl.hpp b/dpnp/backend/extensions/rng/device/common_impl.hpp index a59dadbeee3b..887692380582 100644 --- a/dpnp/backend/extensions/rng/device/common_impl.hpp +++ b/dpnp/backend/extensions/rng/device/common_impl.hpp @@ -31,8 +31,7 @@ #include // dpctl tensor headers -#include "kernels/alignment.hpp" -#include "utils/offset_utils.hpp" +// #include "utils/offset_utils.hpp" namespace dpnp { @@ -48,9 +47,6 @@ namespace details { namespace py = pybind11; -using dpctl::tensor::kernels::alignment_utils::is_aligned; -using dpctl::tensor::kernels::alignment_utils::required_alignment; - namespace mkl_rng_dev = oneapi::mkl::rng::device; /*! @brief Functor for unary function evaluation on contiguous array */ @@ -67,7 +63,7 @@ struct RngContigFunctor const std::uint32_t seed_; const DataT mean_; const DataT stddev_; - ResT *res_ = nullptr; + ResT * const res_ = nullptr; const size_t nelems_; public: @@ -84,10 +80,10 @@ struct RngContigFunctor const std::uint8_t sg_size = sg.get_local_range()[0]; const std::uint8_t max_sg_size = sg.get_max_local_range()[0]; - auto engine = mkl_rng_dev::mrg32k3a(seed_, nelems_ * global_id); + auto engine = mkl_rng_dev::mrg32k3a(seed_, nelems_ * global_id); // offset is questionable... mkl_rng_dev::gaussian distr(mean_, stddev_); - if (enable_sg_load) { + if constexpr (enable_sg_load) { const size_t base = items_per_wi * vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) + sg.get_group_id()[0] * max_sg_size); if ((sg_size == max_sg_size) && (base + items_per_wi * vec_sz * sg_size < nelems_)) { @@ -118,38 +114,38 @@ struct RngContigFunctor } }; -template -struct RngStridedFunctor -{ -private: - const std::uint32_t seed_; - const double mean_; - const double stddev_; - ResT *res_ = nullptr; - IndexerT out_indexer_; - -public: - RngStridedFunctor(const std::uint32_t seed, const double mean, const double stddev, ResT *res_p, IndexerT out_indexer) - : seed_(seed), mean_(mean), stddev_(stddev), res_(res_p), out_indexer_(out_indexer) - { - } - - void operator()(sycl::id<1> wid) const - { - const auto res_offset = out_indexer_(wid.get(0)); - - // UnaryOpT op{}; - - auto engine = mkl_rng_dev::mrg32k3a(seed_); - mkl_rng_dev::gaussian distr(mean_, stddev_); - - res_[res_offset] = mkl_rng_dev::generate(distr, engine); - } -}; +// template +// struct RngStridedFunctor +// { +// private: +// const std::uint32_t seed_; +// const double mean_; +// const double stddev_; +// ResT *res_ = nullptr; +// IndexerT out_indexer_; + +// public: +// RngStridedFunctor(const std::uint32_t seed, const double mean, const double stddev, ResT *res_p, IndexerT out_indexer) +// : seed_(seed), mean_(mean), stddev_(stddev), res_(res_p), out_indexer_(out_indexer) +// { +// } + +// void operator()(sycl::id<1> wid) const +// { +// const auto res_offset = out_indexer_(wid.get(0)); + +// // UnaryOpT op{}; + +// auto engine = mkl_rng_dev::mrg32k3a(seed_); +// mkl_rng_dev::gaussian distr(mean_, stddev_); + +// res_[res_offset] = mkl_rng_dev::generate(distr, engine); +// } +// }; } // namespace details } // namespace device } // namespace rng diff --git a/dpnp/backend/extensions/rng/device/gaussian.cpp b/dpnp/backend/extensions/rng/device/gaussian.cpp index 382108768a18..b875b01a6270 100644 --- a/dpnp/backend/extensions/rng/device/gaussian.cpp +++ b/dpnp/backend/extensions/rng/device/gaussian.cpp @@ -26,7 +26,7 @@ #include // dpctl tensor headers -#include "utils/memory_overlap.hpp" +// #include "utils/memory_overlap.hpp" #include "utils/type_dispatch.hpp" #include "utils/type_utils.hpp" @@ -57,7 +57,7 @@ namespace mkl_rng_dev = oneapi::mkl::rng::device; namespace py = pybind11; namespace type_utils = dpctl::tensor::type_utils; -typedef sycl::event (*gaussian_contig_impl_fn_ptr_t)(sycl::queue &, +typedef sycl::event (*gaussian_impl_fn_ptr_t)(sycl::queue &, const std::uint32_t, const double, const double, @@ -65,30 +65,14 @@ typedef sycl::event (*gaussian_contig_impl_fn_ptr_t)(sycl::queue &, char *, const std::vector &); - -typedef sycl::event (*gaussian_strided_impl_fn_ptr_t)(sycl::queue &, - const std::uint32_t, - const double, - const double, - const std::uint64_t size_t, - int, - const py::ssize_t *, - char *, - py::ssize_t, - const std::vector &, - const std::vector &); - -// static gaussian_impl_fn_ptr_t gaussian_dispatch_vector[dpctl_td_ns::num_types]; - -static gaussian_contig_impl_fn_ptr_t gaussian_contig_dispatch_vector[dpctl_td_ns::num_types]; -static gaussian_strided_impl_fn_ptr_t gaussian_strided_dispatch_vector[dpctl_td_ns::num_types]; +static gaussian_impl_fn_ptr_t gaussian_dispatch_vector[dpctl_td_ns::num_types]; // template template -class gaussian_contig_kernel; +class gaussian_kernel; template -static sycl::event gaussian_contig_impl(sycl::queue& exec_q, +static sycl::event gaussian_impl(sycl::queue& exec_q, const std::uint32_t seed, const double mean_val, const double stddev_val, @@ -98,7 +82,6 @@ static sycl::event gaussian_contig_impl(sycl::queue& exec_q, { type_utils::validate_type_for_device(exec_q); - // const bool enable_sg_load = is_aligned(out_ptr); DataT *out = reinterpret_cast(out_ptr); DataT mean = static_cast(mean_val); DataT stddev = static_cast(stddev_val); @@ -117,63 +100,20 @@ static sycl::event gaussian_contig_impl(sycl::queue& exec_q, if (is_aligned(out_ptr)) { constexpr bool enable_sg_load = true; - using KernelName = gaussian_contig_kernel; + using KernelName = gaussian_kernel; cgh.parallel_for(sycl::nd_range<1>({global_size}, {local_size}), details::RngContigFunctor(seed, mean, stddev, out, n)); } else { constexpr bool disable_sg_load = false; - using InnerKernelName = gaussian_contig_kernel; + using InnerKernelName = gaussian_kernel; using KernelName = disabled_sg_loadstore_wrapper_krn; cgh.parallel_for(sycl::nd_range<1>({global_size}, {local_size}), details::RngContigFunctor(seed, mean, stddev, out, n)); } }); - -// distr_event = exec_q.parallel_for>( -// sycl::nd_range<1>({global_size}, {local_size}), depends, -// [=](sycl::nd_item<1> nd_it) -// { -// auto global_id = nd_it.get_global_id(); - -// auto sg = nd_it.get_sub_group(); -// const std::uint8_t sg_size = sg.get_local_range()[0]; -// const std::uint8_t max_sg_size = sg.get_max_local_range()[0]; - -// auto engine = mkl_rng_dev::mrg32k3a(seed, n * global_id); -// mkl_rng_dev::gaussian distr(mean, stddev); - -// if (enable_sg_load) { -// const size_t base = items_per_wi * vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) + sg.get_group_id()[0] * max_sg_size); - -// if ((sg_size == max_sg_size) && (base + items_per_wi * vec_sz * sg_size < n)) { -// #pragma unroll -// for (std::uint16_t it = 0; it < items_per_wi * vec_sz; it += vec_sz) { -// size_t offset = base + static_cast(it) * static_cast(sg_size); -// auto out_multi_ptr = sycl::address_space_cast(&out[offset]); - -// sycl::vec rng_val_vec = mkl_rng_dev::generate(distr, engine); -// sg.store(out_multi_ptr, rng_val_vec); -// } -// } -// else { -// for (size_t offset = base + sg.get_local_id()[0]; offset < n; offset += sg_size) { -// out[offset] = mkl_rng_dev::generate_single(distr, engine); -// } -// } -// } -// else { -// size_t base = nd_it.get_global_linear_id(); - -// base = (base / sg_size) * sg_size * items_per_wi * vec_sz + (base % sg_size); -// for (size_t offset = base; offset < std::min(n, base + sg_size * (items_per_wi * vec_sz)); offset += sg_size) -// { -// out[offset] = mkl_rng_dev::generate_single(distr, engine); -// } -// } -// }); } catch (oneapi::mkl::exception const &e) { std::stringstream error_msg; @@ -188,63 +128,6 @@ static sycl::event gaussian_contig_impl(sycl::queue& exec_q, return distr_event; } -template -class gaussian_strided_kernel; - -template -sycl::event gaussian_strided_impl(sycl::queue &exec_q, - const std::uint32_t seed, - const double mean_val, - const double stddev_val, - size_t n, - int nd, - const py::ssize_t *shape_and_strides, - char *out_ptr, - py::ssize_t out_offset, - const std::vector &depends, - const std::vector &additional_depends) -{ - type_utils::validate_type_for_device(exec_q); - - DataT *out = reinterpret_cast(out_ptr); - DataT mean = static_cast(mean_val); - DataT stddev = static_cast(stddev_val); - - using IndexerT = typename dpctl::tensor::offset_utils::StridedIndexer; - IndexerT indexer{nd, out_offset, shape_and_strides}; - - sycl::event distr_event; - - try { - distr_event = exec_q.submit([&](sycl::handler &cgh) { - cgh.depends_on(depends); - cgh.depends_on(additional_depends); - - // using resTy = typename UnaryOutputType::value_type; - // using IndexerT = typename dpctl::tensor::offset_utils::StridedIndexer; - using KernelName = gaussian_strided_kernel; - - // IndexerT indexer{nd, res_offset, shape_and_strides}; - // DataT *out = reinterpret_cast(out_ptr); - - cgh.parallel_for( - {n}, - details::RngStridedFunctor(seed, mean, stddev, out, indexer)); - }); - } catch (oneapi::mkl::exception const &e) { - std::stringstream error_msg; - - error_msg << "Unexpected MKL exception caught during strided gaussian call:\nreason: " << e.what(); - throw std::runtime_error(error_msg.str()); - } catch (sycl::exception const &e) { - std::stringstream error_msg; - - error_msg << "Unexpected SYCL exception caught during strided gaussian call:\n" << e.what(); - throw std::runtime_error(error_msg.str()); - } - return distr_event; -} - std::pair gaussian(sycl::queue exec_q, const std::uint32_t seed, const double mean, @@ -254,33 +137,14 @@ std::pair gaussian(sycl::queue exec_q, const std::vector &depends) { const int res_nd = res.get_ndim(); - - // if (eig_vecs_nd != 2) { - // throw py::value_error("Unexpected ndim=" + std::to_string(eig_vecs_nd) + - // " of an output array with eigenvectors"); - // } - // else if (eig_vals_nd != 1) { - // throw py::value_error("Unexpected ndim=" + std::to_string(eig_vals_nd) + - // " of an output array with eigenvalues"); - // } - const py::ssize_t *res_shape = res.get_shape_raw(); - // if (eig_vecs_shape[0] != eig_vecs_shape[1]) { - // throw py::value_error("Output array with eigenvectors with be square"); - // } - // else if (eig_vecs_shape[0] != eig_vals_shape[0]) { - // throw py::value_error( - // "Eigenvectors and eigenvalues have different shapes"); - // } - - size_t src_nelems(1); - + size_t res_nelems(1); for (int i = 0; i < res_nd; ++i) { - src_nelems *= static_cast(res_shape[i]); + res_nelems *= static_cast(res_shape[i]); } - if (src_nelems == 0) { + if (res_nelems == 0) { // nothing to do return std::make_pair(sycl::event(), sycl::event()); } @@ -291,47 +155,30 @@ std::pair gaussian(sycl::queue exec_q, { size_t range = static_cast(res_offsets.second - res_offsets.first); - if (range + 1 < src_nelems) { + if (range + 1 < res_nelems) { throw py::value_error( "Destination array can not accommodate all the elements of source array."); } } - char *res_data = res.get_data(); - - auto array_types = dpctl_td_ns::usm_ndarray_types(); - int res_type_id = array_types.typenum_to_lookup_id(res.get_typenum()); - bool is_res_c_contig = res.is_c_contiguous(); - if (is_res_c_contig) { - auto contig_fn = gaussian_contig_dispatch_vector[res_type_id]; - - if (contig_fn == nullptr) { - throw std::runtime_error( - "Contiguous implementation is missing for res_type_id=" + - std::to_string(res_type_id)); - } - - auto comp_ev = contig_fn(exec_q, seed, mean, stddev, n, res_data, depends); - sycl::event ht_ev = dpctl::utils::keep_args_alive(exec_q, {res}, {comp_ev}); - - return std::make_pair(ht_ev, comp_ev); + if (!is_res_c_contig) { + throw std::runtime_error("Only population of contiguous array is supported."); } + auto array_types = dpctl_td_ns::usm_ndarray_types(); + int res_type_id = array_types.typenum_to_lookup_id(res.get_typenum()); - - auto gaussian_fn = gaussian_contig_dispatch_vector[res_type_id]; + auto gaussian_fn = gaussian_dispatch_vector[res_type_id]; if (gaussian_fn == nullptr) { throw py::value_error("No gaussian implementation defined for a required type"); } - // std::vector host_task_events; - sycl::event gaussian_ev = - gaussian_fn(exec_q, seed, mean, stddev, n, res_data, depends); + char *res_data = res.get_data(); + sycl::event gaussian_ev = gaussian_fn(exec_q, seed, mean, stddev, n, res_data, depends); - sycl::event args_ev = dpctl::utils::keep_args_alive( - exec_q, {res}/*, host_task_events*/); - return std::make_pair(args_ev, gaussian_ev); + sycl::event ht_ev = dpctl::utils::keep_args_alive(exec_q, {res}, {gaussian_ev}); + return std::make_pair(ht_ev, gaussian_ev); } template @@ -350,21 +197,7 @@ struct GaussianContigFactory fnT get() { if constexpr (GaussianTypePairSupportFactory::is_defined) { - return gaussian_contig_impl; - } - else { - return nullptr; - } - } -}; - -template -struct GaussianStridedFactory -{ - fnT get() - { - if constexpr (GaussianTypePairSupportFactory::is_defined) { - return gaussian_strided_impl; + return gaussian_impl; } else { return nullptr; @@ -374,15 +207,10 @@ struct GaussianStridedFactory void init_gaussian_dispatch_vector(void) { - dpctl_td_ns::DispatchVectorBuilder contig; - contig.populate_dispatch_vector(gaussian_contig_dispatch_vector); - - dpctl_td_ns::DispatchVectorBuilder - strided; - strided.populate_dispatch_vector(gaussian_strided_dispatch_vector); + contig.populate_dispatch_vector(gaussian_dispatch_vector); } } // namespace device } // namespace rng From f2d5c0140d2319bf1c87acea6f3fb3060f833d71 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 19 Jan 2024 11:36:34 +0100 Subject: [PATCH 04/16] Added destribution method dispatching --- .../extensions/rng/device/common_impl.hpp | 23 +-- .../extensions/rng/device/gaussian.cpp | 140 +++++++++++++++--- .../extensions/rng/device/gaussian.hpp | 3 +- dpnp/backend/extensions/rng/device/rng_py.cpp | 6 +- 4 files changed, 138 insertions(+), 34 deletions(-) diff --git a/dpnp/backend/extensions/rng/device/common_impl.hpp b/dpnp/backend/extensions/rng/device/common_impl.hpp index 887692380582..a8fae7ab8c3b 100644 --- a/dpnp/backend/extensions/rng/device/common_impl.hpp +++ b/dpnp/backend/extensions/rng/device/common_impl.hpp @@ -51,8 +51,8 @@ namespace mkl_rng_dev = oneapi::mkl::rng::device; /*! @brief Functor for unary function evaluation on contiguous array */ template (seed_, nelems_ * global_id); // offset is questionable... - mkl_rng_dev::gaussian distr(mean_, stddev_); + using EngineT = typename mkl_rng_dev::mrg32k3a; + auto engine = EngineT(seed_, nelems_ * global_id); // offset is questionable... + + using DistrT = typename GaussianDistrT::distr_type; + DistrT distr = distr_(); if constexpr (enable_sg_load) { const size_t base = items_per_wi * vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) + sg.get_group_id()[0] * max_sg_size); @@ -92,13 +95,13 @@ struct RngContigFunctor size_t offset = base + static_cast(it) * static_cast(sg_size); auto out_multi_ptr = sycl::address_space_cast(&res_[offset]); - sycl::vec rng_val_vec = mkl_rng_dev::generate(distr, engine); + sycl::vec rng_val_vec = mkl_rng_dev::generate(distr, engine); sg.store(out_multi_ptr, rng_val_vec); } } else { for (size_t offset = base + sg.get_local_id()[0]; offset < nelems_; offset += sg_size) { - res_[offset] = mkl_rng_dev::generate_single(distr, engine); + res_[offset] = mkl_rng_dev::generate_single(distr, engine); } } } @@ -108,7 +111,7 @@ struct RngContigFunctor base = (base / sg_size) * sg_size * items_per_wi * vec_sz + (base % sg_size); for (size_t offset = base; offset < std::min(nelems_, base + sg_size * (items_per_wi * vec_sz)); offset += sg_size) { - res_[offset] = mkl_rng_dev::generate_single(distr, engine); + res_[offset] = mkl_rng_dev::generate_single(distr, engine); } } } diff --git a/dpnp/backend/extensions/rng/device/gaussian.cpp b/dpnp/backend/extensions/rng/device/gaussian.cpp index b875b01a6270..aed0623e1415 100644 --- a/dpnp/backend/extensions/rng/device/gaussian.cpp +++ b/dpnp/backend/extensions/rng/device/gaussian.cpp @@ -57,6 +57,40 @@ namespace mkl_rng_dev = oneapi::mkl::rng::device; namespace py = pybind11; namespace type_utils = dpctl::tensor::type_utils; +constexpr int num_methods = 2; // number of methods of gaussian distribution + +// static mkl_rng_dev::gaussian_method get_method(const std::int8_t method) { +// switch (method) { +// case 0: return mkl_rng_dev::gaussian_method::by_default; +// case 1: return mkl_rng_dev::gaussian_method::by_default; +// default: +// throw py::value_error(); +// } +// } + +template +struct GaussianDistr +{ +private: + const DataT mean_; + const DataT stddev_; + +public: + using method_type = Method; + using result_type = DataT; + using distr_type = typename mkl_rng_dev::gaussian; + + GaussianDistr(const DataT mean, const DataT stddev) + : mean_(mean), stddev_(stddev) + { + } + + inline auto operator()(void) const + { + return distr_type(mean_, stddev_); + } +}; + typedef sycl::event (*gaussian_impl_fn_ptr_t)(sycl::queue &, const std::uint32_t, const double, @@ -65,13 +99,12 @@ typedef sycl::event (*gaussian_impl_fn_ptr_t)(sycl::queue &, char *, const std::vector &); -static gaussian_impl_fn_ptr_t gaussian_dispatch_vector[dpctl_td_ns::num_types]; +static gaussian_impl_fn_ptr_t gaussian_dispatch_table[dpctl_td_ns::num_types][num_methods]; -// template -template +template class gaussian_kernel; -template +template static sycl::event gaussian_impl(sycl::queue& exec_q, const std::uint32_t seed, const double mean_val, @@ -98,20 +131,23 @@ static sycl::event gaussian_impl(sycl::queue& exec_q, distr_event = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(depends); + using GaussianDistrT = GaussianDistr; + GaussianDistrT distr(mean, stddev); + if (is_aligned(out_ptr)) { constexpr bool enable_sg_load = true; - using KernelName = gaussian_kernel; + using KernelName = gaussian_kernel; cgh.parallel_for(sycl::nd_range<1>({global_size}, {local_size}), - details::RngContigFunctor(seed, mean, stddev, out, n)); + details::RngContigFunctor(seed, distr, out, n)); } else { constexpr bool disable_sg_load = false; - using InnerKernelName = gaussian_kernel; + using InnerKernelName = gaussian_kernel; using KernelName = disabled_sg_loadstore_wrapper_krn; cgh.parallel_for(sycl::nd_range<1>({global_size}, {local_size}), - details::RngContigFunctor(seed, mean, stddev, out, n)); + details::RngContigFunctor(seed, distr, out, n)); } }); } catch (oneapi::mkl::exception const &e) { @@ -129,6 +165,7 @@ static sycl::event gaussian_impl(sycl::queue& exec_q, } std::pair gaussian(sycl::queue exec_q, + const std::uint8_t method_id, const std::uint32_t seed, const double mean, const double stddev, @@ -166,10 +203,14 @@ std::pair gaussian(sycl::queue exec_q, throw std::runtime_error("Only population of contiguous array is supported."); } + if (method_id >= num_methods) { + throw std::runtime_error("Unknown method=" + std::to_string(method_id) + " for gaussian distribution."); + } + auto array_types = dpctl_td_ns::usm_ndarray_types(); int res_type_id = array_types.typenum_to_lookup_id(res.get_typenum()); - auto gaussian_fn = gaussian_dispatch_vector[res_type_id]; + auto gaussian_fn = gaussian_dispatch_table[res_type_id][method_id]; if (gaussian_fn == nullptr) { throw py::value_error("No gaussian implementation defined for a required type"); } @@ -181,23 +222,84 @@ std::pair gaussian(sycl::queue exec_q, return std::make_pair(ht_ev, gaussian_ev); } -template +template typename factory, + int _num_types, + int _num_methods> +// class DispatchTableBuilder : public dpctl_td_ns::DispatchTableBuilder +class DispatchTableBuilder/* : public dpctl_td_ns::DispatchTableBuilder*/ +{ +private: + template + const std::vector row_per_method() const + { + std::vector per_method = { + factory{}.get(), + factory{}.get(), + }; + assert(per_method.size() == _num_methods); + return per_method; + } + +public: + DispatchTableBuilder() = default; + ~DispatchTableBuilder() = default; + + void populate(funcPtrT table[][_num_methods]) const + { + const auto map_by_dst_type = {row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method>(), + row_per_method>()}; + assert(map_by_dst_type.size() == _num_types); + int dst_id = 0; + for (auto &row : map_by_dst_type) { + int src_id = 0; + for (auto &fn_ptr : row) { + table[dst_id][src_id] = fn_ptr; + ++src_id; + } + ++dst_id; + } + } +}; + +template +struct TypePairDefinedEntry : std::bool_constant && + std::is_same_v> +{ + static constexpr bool is_defined = true; +}; + +template struct GaussianTypePairSupportFactory { static constexpr bool is_defined = std::disjunction< - dpctl_td_ns::TypePairDefinedEntry, - dpctl_td_ns::TypePairDefinedEntry, + TypePairDefinedEntry, + TypePairDefinedEntry, + TypePairDefinedEntry, + TypePairDefinedEntry, // fall-through dpctl_td_ns::NotDefinedEntry>::is_defined; }; -template +template struct GaussianContigFactory { fnT get() { - if constexpr (GaussianTypePairSupportFactory::is_defined) { - return gaussian_impl; + if constexpr (GaussianTypePairSupportFactory::is_defined) { + return gaussian_impl; } else { return nullptr; @@ -205,12 +307,10 @@ struct GaussianContigFactory } }; -void init_gaussian_dispatch_vector(void) +void init_gaussian_dispatch_table(void) { - dpctl_td_ns::DispatchVectorBuilder - contig; - contig.populate_dispatch_vector(gaussian_dispatch_vector); + DispatchTableBuilder contig; + contig.populate(gaussian_dispatch_table); } } // namespace device } // namespace rng diff --git a/dpnp/backend/extensions/rng/device/gaussian.hpp b/dpnp/backend/extensions/rng/device/gaussian.hpp index adbc6697ad95..880da06072f0 100644 --- a/dpnp/backend/extensions/rng/device/gaussian.hpp +++ b/dpnp/backend/extensions/rng/device/gaussian.hpp @@ -42,6 +42,7 @@ namespace rng namespace device { extern std::pair gaussian(sycl::queue exec_q, + const std::uint8_t method_id, const std::uint32_t seed, const double mean, const double stddev, @@ -49,7 +50,7 @@ extern std::pair gaussian(sycl::queue exec_q, dpctl::tensor::usm_ndarray res, const std::vector &depends = {}); -extern void init_gaussian_dispatch_vector(void); +extern void init_gaussian_dispatch_table(void); } // namespace device } // namespace rng } // namespace ext diff --git a/dpnp/backend/extensions/rng/device/rng_py.cpp b/dpnp/backend/extensions/rng/device/rng_py.cpp index dffabee5f267..eaa506762348 100644 --- a/dpnp/backend/extensions/rng/device/rng_py.cpp +++ b/dpnp/backend/extensions/rng/device/rng_py.cpp @@ -43,13 +43,13 @@ namespace py = pybind11; // populate dispatch vectors void init_dispatch_vectors(void) { - rng_dev_ext::init_gaussian_dispatch_vector(); + // rng_dev_ext::init_gaussian_dispatch_vector(); } // populate dispatch tables void init_dispatch_tables(void) { - // lapack_ext::init_heevd_dispatch_table(); + rng_dev_ext::init_gaussian_dispatch_table(); } @@ -81,7 +81,7 @@ PYBIND11_MODULE(_rng_dev_impl, m) m.def("_gaussian", &rng_dev_ext::gaussian, "", - py::arg("sycl_queue"), py::arg("seed"), py::arg("mean"), py::arg("stddev"), + py::arg("sycl_queue"), py::arg("method"), py::arg("seed"), py::arg("mean"), py::arg("stddev"), py::arg("n"), py::arg("res"), py::arg("depends") = py::list()); } From a177e7f6a0f42f74841d218821af731b14516b8a Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 2 Feb 2024 17:31:01 +0100 Subject: [PATCH 05/16] Add pybind class for engines --- .../extensions/rng/device/common_impl.hpp | 27 ++++++---- .../extensions/rng/device/gaussian.cpp | 52 ++++++++++++++----- .../extensions/rng/device/gaussian.hpp | 52 +++++++++++++------ dpnp/backend/extensions/rng/device/rng_py.cpp | 34 +++++++++++- 4 files changed, 126 insertions(+), 39 deletions(-) diff --git a/dpnp/backend/extensions/rng/device/common_impl.hpp b/dpnp/backend/extensions/rng/device/common_impl.hpp index a8fae7ab8c3b..24f03a8e9013 100644 --- a/dpnp/backend/extensions/rng/device/common_impl.hpp +++ b/dpnp/backend/extensions/rng/device/common_impl.hpp @@ -50,42 +50,47 @@ namespace py = pybind11; namespace mkl_rng_dev = oneapi::mkl::rng::device; /*! @brief Functor for unary function evaluation on contiguous array */ -template struct RngContigFunctor { private: - const std::uint32_t seed_; + // const std::uint32_t seed_; + EngineDistrT engine_; GaussianDistrT distr_; - ResT * const res_ = nullptr; + DataT * const res_ = nullptr; const size_t nelems_; public: - RngContigFunctor(const std::uint32_t seed, GaussianDistrT& distr, ResT *res, const size_t n_elems) - : seed_(seed), distr_(distr), res_(res), nelems_(n_elems) + RngContigFunctor(EngineDistrT& engine, GaussianDistrT& distr, DataT *res, const size_t n_elems) + : engine_(engine), distr_(distr), res_(res), nelems_(n_elems) { } void operator()(sycl::nd_item<1> nd_it) const { - auto global_id = nd_it.get_global_id(); + // auto global_id = nd_it.get_global_id(); + + // constexpr std::size_t vec_sz = EngineT::vec_size; auto sg = nd_it.get_sub_group(); const std::uint8_t sg_size = sg.get_local_range()[0]; const std::uint8_t max_sg_size = sg.get_max_local_range()[0]; - using EngineT = typename mkl_rng_dev::mrg32k3a; - auto engine = EngineT(seed_, nelems_ * global_id); // offset is questionable... + // auto engine = EngineT(seed_, nelems_ * global_id); // offset is questionable... + + using EngineT = typename EngineDistrT::engine_type; + EngineT engine = engine_(); using DistrT = typename GaussianDistrT::distr_type; DistrT distr = distr_(); + constexpr std::size_t vec_sz = EngineT::vec_size; + if constexpr (enable_sg_load) { const size_t base = items_per_wi * vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) + sg.get_group_id()[0] * max_sg_size); diff --git a/dpnp/backend/extensions/rng/device/gaussian.cpp b/dpnp/backend/extensions/rng/device/gaussian.cpp index aed0623e1415..497e892c8023 100644 --- a/dpnp/backend/extensions/rng/device/gaussian.cpp +++ b/dpnp/backend/extensions/rng/device/gaussian.cpp @@ -91,7 +91,26 @@ struct GaussianDistr } }; -typedef sycl::event (*gaussian_impl_fn_ptr_t)(sycl::queue &, +template +struct EngineDistr +{ +private: + EngineBase *engine_; + +public: + using engine_type = MklEngineT; + + EngineDistr(EngineBase *engine) : engine_(engine) + { + } + + inline auto operator()(void) const + { + return MklEngineT(engine_->seed_, engine_->offset_); + } +}; + +typedef sycl::event (*gaussian_impl_fn_ptr_t)(EngineBase *engine, const std::uint32_t, const double, const double, @@ -101,11 +120,11 @@ typedef sycl::event (*gaussian_impl_fn_ptr_t)(sycl::queue &, static gaussian_impl_fn_ptr_t gaussian_dispatch_table[dpctl_td_ns::num_types][num_methods]; -template +template class gaussian_kernel; -template -static sycl::event gaussian_impl(sycl::queue& exec_q, +template +static sycl::event gaussian_impl(EngineBase *engine, const std::uint32_t seed, const double mean_val, const double stddev_val, @@ -113,13 +132,14 @@ static sycl::event gaussian_impl(sycl::queue& exec_q, char *out_ptr, const std::vector &depends) { + auto exec_q = engine->get_queue(); type_utils::validate_type_for_device(exec_q); DataT *out = reinterpret_cast(out_ptr); DataT mean = static_cast(mean_val); DataT stddev = static_cast(stddev_val); - constexpr std::size_t vec_sz = 8; + constexpr std::size_t vec_sz = EngineT::vec_size; constexpr std::size_t items_per_wi = 4; constexpr std::size_t local_size = 256; const std::size_t wg_items = local_size * vec_sz * items_per_wi; @@ -131,23 +151,28 @@ static sycl::event gaussian_impl(sycl::queue& exec_q, distr_event = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(depends); + using EngineDistrT = EngineDistr; + EngineDistrT eng(static_cast(engine)); + + // EngineT engine = EngineT(seed, 0); + using GaussianDistrT = GaussianDistr; GaussianDistrT distr(mean, stddev); if (is_aligned(out_ptr)) { constexpr bool enable_sg_load = true; - using KernelName = gaussian_kernel; + using KernelName = gaussian_kernel; cgh.parallel_for(sycl::nd_range<1>({global_size}, {local_size}), - details::RngContigFunctor(seed, distr, out, n)); + details::RngContigFunctor(eng, distr, out, n)); } else { constexpr bool disable_sg_load = false; - using InnerKernelName = gaussian_kernel; + using InnerKernelName = gaussian_kernel; using KernelName = disabled_sg_loadstore_wrapper_krn; cgh.parallel_for(sycl::nd_range<1>({global_size}, {local_size}), - details::RngContigFunctor(seed, distr, out, n)); + details::RngContigFunctor(eng, distr, out, n)); } }); } catch (oneapi::mkl::exception const &e) { @@ -164,7 +189,7 @@ static sycl::event gaussian_impl(sycl::queue& exec_q, return distr_event; } -std::pair gaussian(sycl::queue exec_q, +std::pair gaussian(EngineBase *engine, const std::uint8_t method_id, const std::uint32_t seed, const double mean, @@ -173,6 +198,9 @@ std::pair gaussian(sycl::queue exec_q, dpctl::tensor::usm_ndarray res, const std::vector &depends) { + std::cout << engine->print() << std::endl; + auto exec_q = engine->get_queue(); + const int res_nd = res.get_ndim(); const py::ssize_t *res_shape = res.get_shape_raw(); @@ -216,7 +244,7 @@ std::pair gaussian(sycl::queue exec_q, } char *res_data = res.get_data(); - sycl::event gaussian_ev = gaussian_fn(exec_q, seed, mean, stddev, n, res_data, depends); + sycl::event gaussian_ev = gaussian_fn(engine, seed, mean, stddev, n, res_data, depends); sycl::event ht_ev = dpctl::utils::keep_args_alive(exec_q, {res}, {gaussian_ev}); return std::make_pair(ht_ev, gaussian_ev); @@ -299,7 +327,7 @@ struct GaussianContigFactory fnT get() { if constexpr (GaussianTypePairSupportFactory::is_defined) { - return gaussian_impl; + return gaussian_impl, T, M>; } else { return nullptr; diff --git a/dpnp/backend/extensions/rng/device/gaussian.hpp b/dpnp/backend/extensions/rng/device/gaussian.hpp index 880da06072f0..0d19f37cf251 100644 --- a/dpnp/backend/extensions/rng/device/gaussian.hpp +++ b/dpnp/backend/extensions/rng/device/gaussian.hpp @@ -31,17 +31,43 @@ #include -namespace dpnp -{ -namespace backend -{ -namespace ext -{ -namespace rng -{ -namespace device +class EngineBase { +public: + virtual ~EngineBase() {} + virtual sycl::queue get_queue() = 0; + virtual std::string print() = 0; + // auto get_engine() { + // return nullptr; + // } +}; + +class MRG32k3a : public EngineBase { +public: + sycl::queue q_; + const std::uint32_t seed_; + const std::uint64_t offset_; + +// public: + MRG32k3a(sycl::queue &q, std::uint32_t seed, std::uint64_t offset = 0) : q_(q), seed_(seed), offset_(offset) {} + + sycl::queue get_queue() override { + return q_; + } + + std::string print() override { + return "seed = " + std::to_string(seed_) + ", offset = " + std::to_string(offset_); + } + + // auto get_engine() override { + // return oneapi::mkl::rng::device::mrg32k3a<8>(seed_, offset_); + // } + + // using engine_type = oneapi::mkl::rng::device::mrg32k3a<8>; +}; + +namespace dpnp::backend::ext::rng::device { -extern std::pair gaussian(sycl::queue exec_q, +extern std::pair gaussian(EngineBase *engine, const std::uint8_t method_id, const std::uint32_t seed, const double mean, @@ -51,8 +77,4 @@ extern std::pair gaussian(sycl::queue exec_q, const std::vector &depends = {}); extern void init_gaussian_dispatch_table(void); -} // namespace device -} // namespace rng -} // namespace ext -} // namespace backend -} // namespace dpnp +} // namespace dpnp::backend::ext::rng::device diff --git a/dpnp/backend/extensions/rng/device/rng_py.cpp b/dpnp/backend/extensions/rng/device/rng_py.cpp index eaa506762348..5a6e434adb31 100644 --- a/dpnp/backend/extensions/rng/device/rng_py.cpp +++ b/dpnp/backend/extensions/rng/device/rng_py.cpp @@ -52,6 +52,29 @@ void init_dispatch_tables(void) rng_dev_ext::init_gaussian_dispatch_table(); } +class PyEngineBase : public EngineBase { +public: + /* Inherit the constructors */ + using EngineBase::EngineBase; + + /* Trampoline (need one for each virtual function) */ + sycl::queue get_queue() override { + PYBIND11_OVERRIDE_PURE( + sycl::queue, /* Return type */ + EngineBase, /* Parent class */ + get_queue, /* Name of function in C++ (must match Python name) */ + ); + } + + std::string print() override { + PYBIND11_OVERRIDE_PURE( + std::string, /* Return type */ + EngineBase, /* Parent class */ + print, /* Name of function in C++ (must match Python name) */ + ); + } +}; + PYBIND11_MODULE(_rng_dev_impl, m) { @@ -79,9 +102,18 @@ PYBIND11_MODULE(_rng_dev_impl, m) // py::arg("eig_vecs"), py::arg("eig_vals"), // py::arg("depends") = py::list()); + py::class_(m, "EngineBase") + .def(py::init<>()) + .def("print", &EngineBase::print); + + py::class_(m, "MRG32k3a") + .def(py::init()); + + m.def("_gaussian", &rng_dev_ext::gaussian, "", - py::arg("sycl_queue"), py::arg("method"), py::arg("seed"), py::arg("mean"), py::arg("stddev"), + py::arg("engine"), + py::arg("method"), py::arg("seed"), py::arg("mean"), py::arg("stddev"), py::arg("n"), py::arg("res"), py::arg("depends") = py::list()); } From b73f456329c511dea8cf86ba3c5819e321caa712 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 14 Feb 2024 19:19:58 +0100 Subject: [PATCH 06/16] Dispatching by an engine --- .../extensions/rng/device/CMakeLists.txt | 1 + .../extensions/rng/device/common_impl.hpp | 49 +---- .../rng/device/engine/base_builder.hpp | 105 +++++++++++ .../rng/device/engine/engine_base.hpp | 75 ++++++++ .../rng/device/engine/engine_builder.hpp | 38 ++++ .../rng/device/engine/mcg31m1_builder.hpp | 44 +++++ .../rng/device/engine/mcg59_builder.hpp | 44 +++++ .../rng/device/engine/mrg32k3a_builder.hpp | 44 +++++ .../rng/device/engine/mrg32k3a_engine.hpp | 61 +++++++ .../device/engine/philox4x32x10_builder.hpp | 44 +++++ .../extensions/rng/device/gaussian.cpp | 171 ++++++++---------- .../extensions/rng/device/gaussian.hpp | 48 +---- dpnp/backend/extensions/rng/device/rng_py.cpp | 49 ++--- 13 files changed, 561 insertions(+), 212 deletions(-) create mode 100644 dpnp/backend/extensions/rng/device/engine/base_builder.hpp create mode 100644 dpnp/backend/extensions/rng/device/engine/engine_base.hpp create mode 100644 dpnp/backend/extensions/rng/device/engine/engine_builder.hpp create mode 100644 dpnp/backend/extensions/rng/device/engine/mcg31m1_builder.hpp create mode 100644 dpnp/backend/extensions/rng/device/engine/mcg59_builder.hpp create mode 100644 dpnp/backend/extensions/rng/device/engine/mrg32k3a_builder.hpp create mode 100644 dpnp/backend/extensions/rng/device/engine/mrg32k3a_engine.hpp create mode 100644 dpnp/backend/extensions/rng/device/engine/philox4x32x10_builder.hpp diff --git a/dpnp/backend/extensions/rng/device/CMakeLists.txt b/dpnp/backend/extensions/rng/device/CMakeLists.txt index 8521ade5a6c8..83e75d3feec3 100644 --- a/dpnp/backend/extensions/rng/device/CMakeLists.txt +++ b/dpnp/backend/extensions/rng/device/CMakeLists.txt @@ -40,6 +40,7 @@ endif() set_target_properties(${python_module_name} PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON) +target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include) target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src) diff --git a/dpnp/backend/extensions/rng/device/common_impl.hpp b/dpnp/backend/extensions/rng/device/common_impl.hpp index 24f03a8e9013..b44ee03952a7 100644 --- a/dpnp/backend/extensions/rng/device/common_impl.hpp +++ b/dpnp/backend/extensions/rng/device/common_impl.hpp @@ -50,7 +50,7 @@ namespace py = pybind11; namespace mkl_rng_dev = oneapi::mkl::rng::device; /*! @brief Functor for unary function evaluation on contiguous array */ -template nd_it) const { - // auto global_id = nd_it.get_global_id(); - - // constexpr std::size_t vec_sz = EngineT::vec_size; + auto global_id = nd_it.get_global_id(); auto sg = nd_it.get_sub_group(); const std::uint8_t sg_size = sg.get_local_range()[0]; const std::uint8_t max_sg_size = sg.get_max_local_range()[0]; - // auto engine = EngineT(seed_, nelems_ * global_id); // offset is questionable... - - using EngineT = typename EngineDistrT::engine_type; - EngineT engine = engine_(); + using EngineT = typename EngineBuilderT::EngineType; + EngineT engine = engine_(nelems_ * global_id); // offset is questionable... using DistrT = typename GaussianDistrT::distr_type; DistrT distr = distr_(); @@ -121,39 +117,6 @@ struct RngContigFunctor } } }; - -// template -// struct RngStridedFunctor -// { -// private: -// const std::uint32_t seed_; -// const double mean_; -// const double stddev_; -// ResT *res_ = nullptr; -// IndexerT out_indexer_; - -// public: -// RngStridedFunctor(const std::uint32_t seed, const double mean, const double stddev, ResT *res_p, IndexerT out_indexer) -// : seed_(seed), mean_(mean), stddev_(stddev), res_(res_p), out_indexer_(out_indexer) -// { -// } - -// void operator()(sycl::id<1> wid) const -// { -// const auto res_offset = out_indexer_(wid.get(0)); - -// // UnaryOpT op{}; - -// auto engine = mkl_rng_dev::mrg32k3a(seed_); -// mkl_rng_dev::gaussian distr(mean_, stddev_); - -// res_[res_offset] = mkl_rng_dev::generate(distr, engine); -// } -// }; } // namespace details } // namespace device } // namespace rng diff --git a/dpnp/backend/extensions/rng/device/engine/base_builder.hpp b/dpnp/backend/extensions/rng/device/engine/base_builder.hpp new file mode 100644 index 000000000000..8ca1e453b548 --- /dev/null +++ b/dpnp/backend/extensions/rng/device/engine/base_builder.hpp @@ -0,0 +1,105 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include "engine_base.hpp" + + +namespace dpnp::backend::ext::rng::device::engine +{ +template +class BaseBuilder { +private: + static constexpr std::uint8_t max_n = 10; + + std::uint8_t no_of_seeds; + std::uint8_t no_of_offsets; + + std::array seeds{}; + std::array offsets{}; + +public: + BaseBuilder(EngineBase *engine) + { + auto seed_values = engine->get_seeds(); + no_of_seeds = seed_values.size(); + if (no_of_seeds > max_n) { + throw std::runtime_error(""); + } + + // TODO: implement a caster + for (std::uint16_t i = 0; i < no_of_seeds; i++) { + seeds[i] = static_cast(seed_values[i]); + } + + auto offset_values = engine->get_offsets(); + no_of_offsets = offset_values.size(); + if (no_of_offsets > max_n) { + throw std::runtime_error(""); + } + + // TODO: implement a caster + for (std::uint16_t i = 0; i < no_of_seeds; i++) { + offsets[i] = static_cast(offset_values[i]); + } + } + + inline auto operator()() const + { + switch (no_of_seeds) { + case 1: { + return EngineT({seeds[0]}, {offsets[0]}); + } + // TODO: implement full switch + default: + break; + } + return EngineT(); + } + + inline auto operator()(OffsetT offset) const + { + switch (no_of_seeds) { + case 1: { + return EngineT({seeds[0]}, offset); + } + // TODO: implement full switch + default: + break; + } + return EngineT(); + } + + // TODO: remove + void print() { + std::cout << "list_of_seeds: "; + for (auto &val: seeds) { + std::cout << std::to_string(val) << ", "; + } + std::cout << std::endl; + } +}; +} // dpnp::backend::ext::rng::device::engine diff --git a/dpnp/backend/extensions/rng/device/engine/engine_base.hpp b/dpnp/backend/extensions/rng/device/engine/engine_base.hpp new file mode 100644 index 000000000000..1e85e22ebbe4 --- /dev/null +++ b/dpnp/backend/extensions/rng/device/engine/engine_base.hpp @@ -0,0 +1,75 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + + +namespace dpnp::backend::ext::rng::device::engine +{ +class EngineType { +public: + enum Type : std::uint8_t { + MRG32k3a = 0, + Base, // must be the last always + }; + + EngineType() = default; + constexpr EngineType(Type type) : type_(type) {} + + constexpr std::uint8_t id() const { + return static_cast(type_); + } + + static constexpr std::uint8_t base_id() { + return EngineType(Base).id(); + } + +private: + Type type_; +}; + +// A total number of supported engines == EngineType::Base +constexpr int no_of_engines = EngineType::base_id(); + +class EngineBase { +public: + virtual ~EngineBase() {} + virtual sycl::queue &get_queue() = 0; + + virtual EngineType get_type() const noexcept { + return EngineType::Base; + } + + virtual std::vector get_seeds() const noexcept { + return std::vector(); + } + + virtual std::vector get_offsets() const noexcept { + return std::vector(); + } +}; +} // dpnp::backend::ext::rng::device::engine diff --git a/dpnp/backend/extensions/rng/device/engine/engine_builder.hpp b/dpnp/backend/extensions/rng/device/engine/engine_builder.hpp new file mode 100644 index 000000000000..78deeedbca69 --- /dev/null +++ b/dpnp/backend/extensions/rng/device/engine/engine_builder.hpp @@ -0,0 +1,38 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + + +namespace dpnp::backend::ext::rng::device::engine +{ +template +class Builder {}; +} // dpnp::backend::ext::rng::device::engine + +#include "mrg32k3a_builder.hpp" +#include "philox4x32x10_builder.hpp" +#include "mcg31m1_builder.hpp" +#include "mcg59_builder.hpp" diff --git a/dpnp/backend/extensions/rng/device/engine/mcg31m1_builder.hpp b/dpnp/backend/extensions/rng/device/engine/mcg31m1_builder.hpp new file mode 100644 index 000000000000..24a68c8cdf03 --- /dev/null +++ b/dpnp/backend/extensions/rng/device/engine/mcg31m1_builder.hpp @@ -0,0 +1,44 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + +#include "engine_base.hpp" +#include "base_builder.hpp" + +namespace dpnp::backend::ext::rng::device::engine +{ +namespace mkl_rng_dev = oneapi::mkl::rng::device; + +template +class Builder> : public BaseBuilder, std::uint32_t, std::uint64_t> { +public: + using EngineType = mkl_rng_dev::mcg31m1; + + Builder(EngineBase *engine) : BaseBuilder(engine) {} +}; +} // dpnp::backend::ext::rng::device::engine diff --git a/dpnp/backend/extensions/rng/device/engine/mcg59_builder.hpp b/dpnp/backend/extensions/rng/device/engine/mcg59_builder.hpp new file mode 100644 index 000000000000..a16e3aeaf4ee --- /dev/null +++ b/dpnp/backend/extensions/rng/device/engine/mcg59_builder.hpp @@ -0,0 +1,44 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + +#include "engine_base.hpp" +#include "base_builder.hpp" + +namespace dpnp::backend::ext::rng::device::engine +{ +namespace mkl_rng_dev = oneapi::mkl::rng::device; + +template +class Builder> : public BaseBuilder, std::uint32_t, std::uint64_t> { +public: + using EngineType = mkl_rng_dev::mcg59; + + Builder(EngineBase *engine) : BaseBuilder(engine) {} +}; +} // dpnp::backend::ext::rng::device::engine diff --git a/dpnp/backend/extensions/rng/device/engine/mrg32k3a_builder.hpp b/dpnp/backend/extensions/rng/device/engine/mrg32k3a_builder.hpp new file mode 100644 index 000000000000..e7ef86cd3bab --- /dev/null +++ b/dpnp/backend/extensions/rng/device/engine/mrg32k3a_builder.hpp @@ -0,0 +1,44 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + +#include "engine_base.hpp" +#include "base_builder.hpp" + +namespace dpnp::backend::ext::rng::device::engine +{ +namespace mkl_rng_dev = oneapi::mkl::rng::device; + +template +class Builder> : public BaseBuilder, std::uint32_t, std::uint64_t> { +public: + using EngineType = mkl_rng_dev::mrg32k3a; + + Builder(EngineBase *engine) : BaseBuilder(engine) {} +}; +} // dpnp::backend::ext::rng::device::engine diff --git a/dpnp/backend/extensions/rng/device/engine/mrg32k3a_engine.hpp b/dpnp/backend/extensions/rng/device/engine/mrg32k3a_engine.hpp new file mode 100644 index 000000000000..413ff43540a5 --- /dev/null +++ b/dpnp/backend/extensions/rng/device/engine/mrg32k3a_engine.hpp @@ -0,0 +1,61 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include "engine_base.hpp" + + +namespace dpnp::backend::ext::rng::device::engine +{ +class MRG32k3a : public EngineBase { +private: + sycl::queue q_; + std::vector seed_vec; + std::vector offset_vec; + +public: + MRG32k3a(sycl::queue &q, std::uint32_t seed, std::uint64_t offset = 0) : q_(q) { + seed_vec.push_back(seed); + offset_vec.push_back(offset); + } + + sycl::queue &get_queue() override { + return q_; + } + + virtual EngineType get_type() const noexcept override { + return EngineType::MRG32k3a; + } + + virtual std::vector get_seeds() const noexcept override { + return seed_vec; + } + + virtual std::vector get_offsets() const noexcept override { + return offset_vec; + } +}; +} // dpnp::backend::ext::rng::device::engine diff --git a/dpnp/backend/extensions/rng/device/engine/philox4x32x10_builder.hpp b/dpnp/backend/extensions/rng/device/engine/philox4x32x10_builder.hpp new file mode 100644 index 000000000000..7186ae7c3a3b --- /dev/null +++ b/dpnp/backend/extensions/rng/device/engine/philox4x32x10_builder.hpp @@ -0,0 +1,44 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + +#include "engine_base.hpp" +#include "base_builder.hpp" + +namespace dpnp::backend::ext::rng::device::engine +{ +namespace mkl_rng_dev = oneapi::mkl::rng::device; + +template +class Builder> : public BaseBuilder, std::uint64_t, std::uint64_t> { +public: + using EngineType = mkl_rng_dev::philox4x32x10; + + Builder(EngineBase *engine) : BaseBuilder(engine) {} +}; +} // dpnp::backend::ext::rng::device::engine diff --git a/dpnp/backend/extensions/rng/device/gaussian.cpp b/dpnp/backend/extensions/rng/device/gaussian.cpp index 497e892c8023..7788b5dd2771 100644 --- a/dpnp/backend/extensions/rng/device/gaussian.cpp +++ b/dpnp/backend/extensions/rng/device/gaussian.cpp @@ -40,6 +40,9 @@ using dpctl::tensor::kernels::alignment_utils::required_alignment; #include "common_impl.hpp" #include "gaussian.hpp" +#include "engine/engine_base.hpp" +#include "engine/engine_builder.hpp" + // #include "dpnp_utils.hpp" namespace dpnp @@ -57,16 +60,7 @@ namespace mkl_rng_dev = oneapi::mkl::rng::device; namespace py = pybind11; namespace type_utils = dpctl::tensor::type_utils; -constexpr int num_methods = 2; // number of methods of gaussian distribution - -// static mkl_rng_dev::gaussian_method get_method(const std::int8_t method) { -// switch (method) { -// case 0: return mkl_rng_dev::gaussian_method::by_default; -// case 1: return mkl_rng_dev::gaussian_method::by_default; -// default: -// throw py::value_error(); -// } -// } +constexpr int no_of_methods = 2; // number of methods of gaussian distribution template struct GaussianDistr @@ -91,48 +85,27 @@ struct GaussianDistr } }; -template -struct EngineDistr -{ -private: - EngineBase *engine_; - -public: - using engine_type = MklEngineT; - - EngineDistr(EngineBase *engine) : engine_(engine) - { - } - - inline auto operator()(void) const - { - return MklEngineT(engine_->seed_, engine_->offset_); - } -}; - -typedef sycl::event (*gaussian_impl_fn_ptr_t)(EngineBase *engine, - const std::uint32_t, - const double, - const double, - const std::uint64_t, - char *, - const std::vector &); +typedef sycl::event (*gaussian_impl_fn_ptr_t)(engine::EngineBase *engine, + const double, + const double, + const std::uint64_t, + char *, + const std::vector &); -static gaussian_impl_fn_ptr_t gaussian_dispatch_table[dpctl_td_ns::num_types][num_methods]; +static gaussian_impl_fn_ptr_t gaussian_dispatch_table[engine::no_of_engines][dpctl_td_ns::num_types][no_of_methods]; template class gaussian_kernel; template -static sycl::event gaussian_impl(EngineBase *engine, - const std::uint32_t seed, +static sycl::event gaussian_impl(engine::EngineBase *engine, const double mean_val, const double stddev_val, const std::uint64_t n, char *out_ptr, const std::vector &depends) { - auto exec_q = engine->get_queue(); + auto &exec_q = engine->get_queue(); type_utils::validate_type_for_device(exec_q); DataT *out = reinterpret_cast(out_ptr); @@ -146,15 +119,14 @@ static sycl::event gaussian_impl(EngineBase *engine, const std::size_t global_size = ((n + wg_items - 1) / (wg_items)) * local_size; sycl::event distr_event; - + try { distr_event = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(depends); - using EngineDistrT = EngineDistr; - EngineDistrT eng(static_cast(engine)); - - // EngineT engine = EngineT(seed, 0); + using EngineBuilderT = engine::Builder; + EngineBuilderT eng_builder(engine); + eng_builder.print(); // TODO: remove using GaussianDistrT = GaussianDistr; GaussianDistrT distr(mean, stddev); @@ -164,7 +136,7 @@ static sycl::event gaussian_impl(EngineBase *engine, using KernelName = gaussian_kernel; cgh.parallel_for(sycl::nd_range<1>({global_size}, {local_size}), - details::RngContigFunctor(eng, distr, out, n)); + details::RngContigFunctor(eng_builder, distr, out, n)); } else { constexpr bool disable_sg_load = false; @@ -172,7 +144,7 @@ static sycl::event gaussian_impl(EngineBase *engine, using KernelName = disabled_sg_loadstore_wrapper_krn; cgh.parallel_for(sycl::nd_range<1>({global_size}, {local_size}), - details::RngContigFunctor(eng, distr, out, n)); + details::RngContigFunctor(eng_builder, distr, out, n)); } }); } catch (oneapi::mkl::exception const &e) { @@ -189,17 +161,15 @@ static sycl::event gaussian_impl(EngineBase *engine, return distr_event; } -std::pair gaussian(EngineBase *engine, +std::pair gaussian(engine::EngineBase *engine, const std::uint8_t method_id, - const std::uint32_t seed, const double mean, const double stddev, const std::uint64_t n, dpctl::tensor::usm_ndarray res, const std::vector &depends) { - std::cout << engine->print() << std::endl; - auto exec_q = engine->get_queue(); + auto &exec_q = engine->get_queue(); const int res_nd = res.get_ndim(); const py::ssize_t *res_shape = res.get_shape_raw(); @@ -231,73 +201,92 @@ std::pair gaussian(EngineBase *engine, throw std::runtime_error("Only population of contiguous array is supported."); } - if (method_id >= num_methods) { + auto enginge_id = engine->get_type().id(); + if (enginge_id >= engine::no_of_engines) { + throw std::runtime_error("Unknown engine type=" + std::to_string(enginge_id) + " for gaussian distribution."); + } + + if (method_id >= no_of_methods) { throw std::runtime_error("Unknown method=" + std::to_string(method_id) + " for gaussian distribution."); } auto array_types = dpctl_td_ns::usm_ndarray_types(); int res_type_id = array_types.typenum_to_lookup_id(res.get_typenum()); - auto gaussian_fn = gaussian_dispatch_table[res_type_id][method_id]; + auto gaussian_fn = gaussian_dispatch_table[enginge_id][res_type_id][method_id]; if (gaussian_fn == nullptr) { throw py::value_error("No gaussian implementation defined for a required type"); } char *res_data = res.get_data(); - sycl::event gaussian_ev = gaussian_fn(engine, seed, mean, stddev, n, res_data, depends); + sycl::event gaussian_ev = gaussian_fn(engine, mean, stddev, n, res_data, depends); sycl::event ht_ev = dpctl::utils::keep_args_alive(exec_q, {res}, {gaussian_ev}); return std::make_pair(ht_ev, gaussian_ev); } template typename factory, - int _num_types, - int _num_methods> -// class DispatchTableBuilder : public dpctl_td_ns::DispatchTableBuilder -class DispatchTableBuilder/* : public dpctl_td_ns::DispatchTableBuilder*/ + template typename factory, + int _no_of_engines, + int _no_of_types, + int _no_of_methods> +class Dispatch3DTableBuilder { private: - template + template const std::vector row_per_method() const { std::vector per_method = { - factory{}.get(), - factory{}.get(), + factory{}.get(), + factory{}.get(), }; - assert(per_method.size() == _num_methods); + assert(per_method.size() == _no_of_methods); return per_method; } + template + auto table_per_type_and_method() const + { + std::vector> + table_by_type = {row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method>(), + row_per_method>()}; + assert(table_by_type.size() == _no_of_types); + return table_by_type; + } + public: - DispatchTableBuilder() = default; - ~DispatchTableBuilder() = default; + Dispatch3DTableBuilder() = default; + ~Dispatch3DTableBuilder() = default; - void populate(funcPtrT table[][_num_methods]) const + void populate(funcPtrT table[][_no_of_types][_no_of_methods]) const { - const auto map_by_dst_type = {row_per_method(), - row_per_method(), - row_per_method(), - row_per_method(), - row_per_method(), - row_per_method(), - row_per_method(), - row_per_method(), - row_per_method(), - row_per_method(), - row_per_method(), - row_per_method(), - row_per_method>(), - row_per_method>()}; - assert(map_by_dst_type.size() == _num_types); - int dst_id = 0; - for (auto &row : map_by_dst_type) { - int src_id = 0; - for (auto &fn_ptr : row) { - table[dst_id][src_id] = fn_ptr; - ++src_id; + const auto map_by_engine = {table_per_type_and_method>()}; + assert(map_by_engine.size() == _no_of_engines); + + std::uint16_t engine_id = 0; + for (auto &table_by_type : map_by_engine) { + std::uint16_t type_id = 0; + for (auto &row_by_method : table_by_type) { + std::uint16_t method_id = 0; + for (auto &fn_ptr : row_by_method) { + table[engine_id][type_id][method_id] = fn_ptr; + ++method_id; + } + ++type_id; } - ++dst_id; + ++engine_id; } } }; @@ -321,13 +310,13 @@ struct GaussianTypePairSupportFactory dpctl_td_ns::NotDefinedEntry>::is_defined; }; -template +template struct GaussianContigFactory { fnT get() { if constexpr (GaussianTypePairSupportFactory::is_defined) { - return gaussian_impl, T, M>; + return gaussian_impl; } else { return nullptr; @@ -337,7 +326,7 @@ struct GaussianContigFactory void init_gaussian_dispatch_table(void) { - DispatchTableBuilder contig; + Dispatch3DTableBuilder contig; contig.populate(gaussian_dispatch_table); } } // namespace device diff --git a/dpnp/backend/extensions/rng/device/gaussian.hpp b/dpnp/backend/extensions/rng/device/gaussian.hpp index 0d19f37cf251..6c7baee6c1af 100644 --- a/dpnp/backend/extensions/rng/device/gaussian.hpp +++ b/dpnp/backend/extensions/rng/device/gaussian.hpp @@ -31,50 +31,18 @@ #include -class EngineBase { -public: - virtual ~EngineBase() {} - virtual sycl::queue get_queue() = 0; - virtual std::string print() = 0; - // auto get_engine() { - // return nullptr; - // } -}; +#include "engine/engine_base.hpp" -class MRG32k3a : public EngineBase { -public: - sycl::queue q_; - const std::uint32_t seed_; - const std::uint64_t offset_; - -// public: - MRG32k3a(sycl::queue &q, std::uint32_t seed, std::uint64_t offset = 0) : q_(q), seed_(seed), offset_(offset) {} - - sycl::queue get_queue() override { - return q_; - } - - std::string print() override { - return "seed = " + std::to_string(seed_) + ", offset = " + std::to_string(offset_); - } - - // auto get_engine() override { - // return oneapi::mkl::rng::device::mrg32k3a<8>(seed_, offset_); - // } - - // using engine_type = oneapi::mkl::rng::device::mrg32k3a<8>; -}; namespace dpnp::backend::ext::rng::device { -extern std::pair gaussian(EngineBase *engine, - const std::uint8_t method_id, - const std::uint32_t seed, - const double mean, - const double stddev, - const std::uint64_t n, - dpctl::tensor::usm_ndarray res, - const std::vector &depends = {}); +extern std::pair gaussian(engine::EngineBase *engine, + const std::uint8_t method_id, + const double mean, + const double stddev, + const std::uint64_t n, + dpctl::tensor::usm_ndarray res, + const std::vector &depends = {}); extern void init_gaussian_dispatch_table(void); } // namespace dpnp::backend::ext::rng::device diff --git a/dpnp/backend/extensions/rng/device/rng_py.cpp b/dpnp/backend/extensions/rng/device/rng_py.cpp index 5a6e434adb31..3cc418fb495f 100644 --- a/dpnp/backend/extensions/rng/device/rng_py.cpp +++ b/dpnp/backend/extensions/rng/device/rng_py.cpp @@ -35,9 +35,11 @@ #include #include "gaussian.hpp" +#include "engine/mrg32k3a_engine.hpp" namespace mkl_rng = oneapi::mkl::rng; namespace rng_dev_ext = dpnp::backend::ext::rng::device; +namespace rng_dev_engine = dpnp::backend::ext::rng::device::engine; namespace py = pybind11; // populate dispatch vectors @@ -52,25 +54,17 @@ void init_dispatch_tables(void) rng_dev_ext::init_gaussian_dispatch_table(); } -class PyEngineBase : public EngineBase { +class PyEngineBase : public rng_dev_engine::EngineBase { public: /* Inherit the constructors */ - using EngineBase::EngineBase; + using rng_dev_engine::EngineBase::EngineBase; /* Trampoline (need one for each virtual function) */ - sycl::queue get_queue() override { + sycl::queue &get_queue() override { PYBIND11_OVERRIDE_PURE( - sycl::queue, /* Return type */ + sycl::queue&, /* Return type */ EngineBase, /* Parent class */ - get_queue, /* Name of function in C++ (must match Python name) */ - ); - } - - std::string print() override { - PYBIND11_OVERRIDE_PURE( - std::string, /* Return type */ - EngineBase, /* Parent class */ - print, /* Name of function in C++ (must match Python name) */ + get_queue, /* Name of function in C++ (must match Python name) */ ); } }; @@ -78,42 +72,21 @@ class PyEngineBase : public EngineBase { PYBIND11_MODULE(_rng_dev_impl, m) { - // using engine_base_t = rng_ext::EngineBase; - // py::class_ engine_base(m, "EngineBase"); - // engine_base.def(py::init()) - // .def("get_queue", &engine_base_t::get_queue); - - // using mt19937_engine_t = rng_ext::EngineProxy; - // py::class_(m, "mt19937", engine_base) - // .def(py::init()) - // .def(py::init>()); - - // using mcg59_engine_t = rng_ext::EngineProxy; - // py::class_(m, "mcg59", engine_base) - // .def(py::init()); - init_dispatch_vectors(); init_dispatch_tables(); - // m.def("_heevd", &lapack_ext::heevd, - // "Call `heevd` from OneMKL LAPACK library to return " - // "the eigenvalues and eigenvectors of a complex Hermitian matrix", - // py::arg("sycl_queue"), py::arg("jobz"), py::arg("upper_lower"), - // py::arg("eig_vecs"), py::arg("eig_vals"), - // py::arg("depends") = py::list()); - - py::class_(m, "EngineBase") + py::class_(m, "EngineBase") .def(py::init<>()) - .def("print", &EngineBase::print); + .def("get_queue", &rng_dev_engine::EngineBase::get_queue); - py::class_(m, "MRG32k3a") + py::class_(m, "MRG32k3a") .def(py::init()); m.def("_gaussian", &rng_dev_ext::gaussian, "", py::arg("engine"), - py::arg("method"), py::arg("seed"), py::arg("mean"), py::arg("stddev"), + py::arg("method"), py::arg("mean"), py::arg("stddev"), py::arg("n"), py::arg("res"), py::arg("depends") = py::list()); } From 0fda09491f692edd26275c9e5bd2a0e49faaed63 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 14 Feb 2024 21:03:52 +0100 Subject: [PATCH 07/16] Decoupling dispatching functionality --- .../extensions/rng/device/common_impl.hpp | 14 +-- .../extensions/rng/device/dispatch/matrix.hpp | 56 +++++++++ .../rng/device/dispatch/table_builder.hpp | 100 +++++++++++++++ .../rng/device/engine/base_builder.hpp | 2 +- .../extensions/rng/device/gaussian.cpp | 116 +++--------------- 5 files changed, 180 insertions(+), 108 deletions(-) create mode 100644 dpnp/backend/extensions/rng/device/dispatch/matrix.hpp create mode 100644 dpnp/backend/extensions/rng/device/dispatch/table_builder.hpp diff --git a/dpnp/backend/extensions/rng/device/common_impl.hpp b/dpnp/backend/extensions/rng/device/common_impl.hpp index b44ee03952a7..c2f573a233ba 100644 --- a/dpnp/backend/extensions/rng/device/common_impl.hpp +++ b/dpnp/backend/extensions/rng/device/common_impl.hpp @@ -49,24 +49,22 @@ namespace py = pybind11; namespace mkl_rng_dev = oneapi::mkl::rng::device; -/*! @brief Functor for unary function evaluation on contiguous array */ template struct RngContigFunctor { private: - // const std::uint32_t seed_; + using DataT = typename DistributorBuilderT::result_type; + EngineBuilderT engine_; - GaussianDistrT distr_; + DistributorBuilderT distr_; DataT * const res_ = nullptr; const size_t nelems_; public: - - RngContigFunctor(EngineBuilderT& engine, GaussianDistrT& distr, DataT *res, const size_t n_elems) + RngContigFunctor(EngineBuilderT& engine, DistributorBuilderT& distr, DataT *res, const size_t n_elems) : engine_(engine), distr_(distr), res_(res), nelems_(n_elems) { } @@ -82,7 +80,7 @@ struct RngContigFunctor using EngineT = typename EngineBuilderT::EngineType; EngineT engine = engine_(nelems_ * global_id); // offset is questionable... - using DistrT = typename GaussianDistrT::distr_type; + using DistrT = typename DistributorBuilderT::distr_type; DistrT distr = distr_(); constexpr std::size_t vec_sz = EngineT::vec_size; diff --git a/dpnp/backend/extensions/rng/device/dispatch/matrix.hpp b/dpnp/backend/extensions/rng/device/dispatch/matrix.hpp new file mode 100644 index 000000000000..d2e5b42fbc25 --- /dev/null +++ b/dpnp/backend/extensions/rng/device/dispatch/matrix.hpp @@ -0,0 +1,56 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + +#include "utils/type_dispatch.hpp" + + +namespace dpnp::backend::ext::rng::device::dispatch +{ +namespace dpctl_td_ns = dpctl::tensor::type_dispatch; +namespace mkl_rng_dev = oneapi::mkl::rng::device; + +template +struct TypePairDefinedEntry : std::bool_constant && + std::is_same_v> +{ + static constexpr bool is_defined = true; +}; + +template +struct GaussianTypePairSupportFactory +{ + static constexpr bool is_defined = std::disjunction< + TypePairDefinedEntry, + TypePairDefinedEntry, + TypePairDefinedEntry, + TypePairDefinedEntry, + // fall-through + dpctl_td_ns::NotDefinedEntry>::is_defined; +}; +} // dpnp::backend::ext::rng::device::dispatch diff --git a/dpnp/backend/extensions/rng/device/dispatch/table_builder.hpp b/dpnp/backend/extensions/rng/device/dispatch/table_builder.hpp new file mode 100644 index 000000000000..247f968e3196 --- /dev/null +++ b/dpnp/backend/extensions/rng/device/dispatch/table_builder.hpp @@ -0,0 +1,100 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + + +namespace dpnp::backend::ext::rng::device::dispatch +{ +namespace mkl_rng_dev = oneapi::mkl::rng::device; + +template typename factory, + int _no_of_engines, + int _no_of_types, + int _no_of_methods> +class Dispatch3DTableBuilder +{ +private: + template + const std::vector row_per_method() const + { + std::vector per_method = { + factory{}.get(), + factory{}.get(), + }; + assert(per_method.size() == _no_of_methods); + return per_method; + } + + template + auto table_per_type_and_method() const + { + std::vector> + table_by_type = {row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method>(), + row_per_method>()}; + assert(table_by_type.size() == _no_of_types); + return table_by_type; + } + +public: + Dispatch3DTableBuilder() = default; + ~Dispatch3DTableBuilder() = default; + + void populate(funcPtrT table[][_no_of_types][_no_of_methods]) const + { + const auto map_by_engine = {table_per_type_and_method>()}; + assert(map_by_engine.size() == _no_of_engines); + + std::uint16_t engine_id = 0; + for (auto &table_by_type : map_by_engine) { + std::uint16_t type_id = 0; + for (auto &row_by_method : table_by_type) { + std::uint16_t method_id = 0; + for (auto &fn_ptr : row_by_method) { + table[engine_id][type_id][method_id] = fn_ptr; + ++method_id; + } + ++type_id; + } + ++engine_id; + } + } +}; +} // dpnp::backend::ext::rng::device::dispatch diff --git a/dpnp/backend/extensions/rng/device/engine/base_builder.hpp b/dpnp/backend/extensions/rng/device/engine/base_builder.hpp index 8ca1e453b548..2ce7230ae471 100644 --- a/dpnp/backend/extensions/rng/device/engine/base_builder.hpp +++ b/dpnp/backend/extensions/rng/device/engine/base_builder.hpp @@ -67,7 +67,7 @@ class BaseBuilder { } } - inline auto operator()() const + inline auto operator()(void) const { switch (no_of_seeds) { case 1: { diff --git a/dpnp/backend/extensions/rng/device/gaussian.cpp b/dpnp/backend/extensions/rng/device/gaussian.cpp index 7788b5dd2771..284ad4ff86db 100644 --- a/dpnp/backend/extensions/rng/device/gaussian.cpp +++ b/dpnp/backend/extensions/rng/device/gaussian.cpp @@ -33,17 +33,15 @@ // dpctl tensor headers #include "kernels/alignment.hpp" -using dpctl::tensor::kernels::alignment_utils::disabled_sg_loadstore_wrapper_krn; -using dpctl::tensor::kernels::alignment_utils::is_aligned; -using dpctl::tensor::kernels::alignment_utils::required_alignment; - #include "common_impl.hpp" #include "gaussian.hpp" #include "engine/engine_base.hpp" #include "engine/engine_builder.hpp" -// #include "dpnp_utils.hpp" +#include "dispatch/matrix.hpp" +#include "dispatch/table_builder.hpp" + namespace dpnp { @@ -55,26 +53,31 @@ namespace rng { namespace device { +namespace dpctl_krn_ns = dpctl::tensor::kernels::alignment_utils; namespace dpctl_td_ns = dpctl::tensor::type_dispatch; namespace mkl_rng_dev = oneapi::mkl::rng::device; namespace py = pybind11; namespace type_utils = dpctl::tensor::type_utils; +using dpctl_krn_ns::disabled_sg_loadstore_wrapper_krn; +using dpctl_krn_ns::is_aligned; +using dpctl_krn_ns::required_alignment; + constexpr int no_of_methods = 2; // number of methods of gaussian distribution template -struct GaussianDistr +struct DistributorBuilder { private: const DataT mean_; const DataT stddev_; public: - using method_type = Method; using result_type = DataT; + using method_type = Method; using distr_type = typename mkl_rng_dev::gaussian; - GaussianDistr(const DataT mean, const DataT stddev) + DistributorBuilder(const DataT mean, const DataT stddev) : mean_(mean), stddev_(stddev) { } @@ -128,15 +131,15 @@ static sycl::event gaussian_impl(engine::EngineBase *engine, EngineBuilderT eng_builder(engine); eng_builder.print(); // TODO: remove - using GaussianDistrT = GaussianDistr; - GaussianDistrT distr(mean, stddev); + using DistributorBuilderT = DistributorBuilder; + DistributorBuilderT dist_builder(mean, stddev); if (is_aligned(out_ptr)) { constexpr bool enable_sg_load = true; using KernelName = gaussian_kernel; cgh.parallel_for(sycl::nd_range<1>({global_size}, {local_size}), - details::RngContigFunctor(eng_builder, distr, out, n)); + details::RngContigFunctor(eng_builder, dist_builder, out, n)); } else { constexpr bool disable_sg_load = false; @@ -144,7 +147,7 @@ static sycl::event gaussian_impl(engine::EngineBase *engine, using KernelName = disabled_sg_loadstore_wrapper_krn; cgh.parallel_for(sycl::nd_range<1>({global_size}, {local_size}), - details::RngContigFunctor(eng_builder, distr, out, n)); + details::RngContigFunctor(eng_builder, dist_builder, out, n)); } }); } catch (oneapi::mkl::exception const &e) { @@ -225,97 +228,12 @@ std::pair gaussian(engine::EngineBase *engine, return std::make_pair(ht_ev, gaussian_ev); } -template typename factory, - int _no_of_engines, - int _no_of_types, - int _no_of_methods> -class Dispatch3DTableBuilder -{ -private: - template - const std::vector row_per_method() const - { - std::vector per_method = { - factory{}.get(), - factory{}.get(), - }; - assert(per_method.size() == _no_of_methods); - return per_method; - } - - template - auto table_per_type_and_method() const - { - std::vector> - table_by_type = {row_per_method(), - row_per_method(), - row_per_method(), - row_per_method(), - row_per_method(), - row_per_method(), - row_per_method(), - row_per_method(), - row_per_method(), - row_per_method(), - row_per_method(), - row_per_method(), - row_per_method>(), - row_per_method>()}; - assert(table_by_type.size() == _no_of_types); - return table_by_type; - } - -public: - Dispatch3DTableBuilder() = default; - ~Dispatch3DTableBuilder() = default; - - void populate(funcPtrT table[][_no_of_types][_no_of_methods]) const - { - const auto map_by_engine = {table_per_type_and_method>()}; - assert(map_by_engine.size() == _no_of_engines); - - std::uint16_t engine_id = 0; - for (auto &table_by_type : map_by_engine) { - std::uint16_t type_id = 0; - for (auto &row_by_method : table_by_type) { - std::uint16_t method_id = 0; - for (auto &fn_ptr : row_by_method) { - table[engine_id][type_id][method_id] = fn_ptr; - ++method_id; - } - ++type_id; - } - ++engine_id; - } - } -}; - -template -struct TypePairDefinedEntry : std::bool_constant && - std::is_same_v> -{ - static constexpr bool is_defined = true; -}; - -template -struct GaussianTypePairSupportFactory -{ - static constexpr bool is_defined = std::disjunction< - TypePairDefinedEntry, - TypePairDefinedEntry, - TypePairDefinedEntry, - TypePairDefinedEntry, - // fall-through - dpctl_td_ns::NotDefinedEntry>::is_defined; -}; - template struct GaussianContigFactory { fnT get() { - if constexpr (GaussianTypePairSupportFactory::is_defined) { + if constexpr (dispatch::GaussianTypePairSupportFactory::is_defined) { return gaussian_impl; } else { @@ -326,7 +244,7 @@ struct GaussianContigFactory void init_gaussian_dispatch_table(void) { - Dispatch3DTableBuilder contig; + dispatch::Dispatch3DTableBuilder contig; contig.populate(gaussian_dispatch_table); } } // namespace device From a9a0f36fb36fe934311f161988b40e5d3c6e3458 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Thu, 15 Feb 2024 11:11:35 +0100 Subject: [PATCH 08/16] Move engine builder to separate folder --- dpnp/backend/extensions/rng/device/CMakeLists.txt | 1 + .../engine/{base_builder.hpp => builder/base.hpp} | 4 ++-- .../{engine_builder.hpp => builder/builder.hpp} | 12 ++++++------ .../{mcg31m1_builder.hpp => builder/mcg31m1.hpp} | 6 +++--- .../engine/{mcg59_builder.hpp => builder/mcg59.hpp} | 6 +++--- .../{mrg32k3a_builder.hpp => builder/mrg32k3a.hpp} | 6 +++--- .../philox4x32x10.hpp} | 6 +++--- dpnp/backend/extensions/rng/device/gaussian.cpp | 4 ++-- 8 files changed, 23 insertions(+), 22 deletions(-) rename dpnp/backend/extensions/rng/device/engine/{base_builder.hpp => builder/base.hpp} (96%) rename dpnp/backend/extensions/rng/device/engine/{engine_builder.hpp => builder/builder.hpp} (87%) rename dpnp/backend/extensions/rng/device/engine/{mcg31m1_builder.hpp => builder/mcg31m1.hpp} (93%) rename dpnp/backend/extensions/rng/device/engine/{mcg59_builder.hpp => builder/mcg59.hpp} (93%) rename dpnp/backend/extensions/rng/device/engine/{mrg32k3a_builder.hpp => builder/mrg32k3a.hpp} (93%) rename dpnp/backend/extensions/rng/device/engine/{philox4x32x10_builder.hpp => builder/philox4x32x10.hpp} (93%) diff --git a/dpnp/backend/extensions/rng/device/CMakeLists.txt b/dpnp/backend/extensions/rng/device/CMakeLists.txt index 83e75d3feec3..c8ea8e8c9071 100644 --- a/dpnp/backend/extensions/rng/device/CMakeLists.txt +++ b/dpnp/backend/extensions/rng/device/CMakeLists.txt @@ -41,6 +41,7 @@ endif() set_target_properties(${python_module_name} PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON) target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) +target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/engine) target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include) target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src) diff --git a/dpnp/backend/extensions/rng/device/engine/base_builder.hpp b/dpnp/backend/extensions/rng/device/engine/builder/base.hpp similarity index 96% rename from dpnp/backend/extensions/rng/device/engine/base_builder.hpp rename to dpnp/backend/extensions/rng/device/engine/builder/base.hpp index 2ce7230ae471..a1040fe8ef29 100644 --- a/dpnp/backend/extensions/rng/device/engine/base_builder.hpp +++ b/dpnp/backend/extensions/rng/device/engine/builder/base.hpp @@ -28,7 +28,7 @@ #include "engine_base.hpp" -namespace dpnp::backend::ext::rng::device::engine +namespace dpnp::backend::ext::rng::device::engine::builder { template class BaseBuilder { @@ -102,4 +102,4 @@ class BaseBuilder { std::cout << std::endl; } }; -} // dpnp::backend::ext::rng::device::engine +} // dpnp::backend::ext::rng::device::engine::builder diff --git a/dpnp/backend/extensions/rng/device/engine/engine_builder.hpp b/dpnp/backend/extensions/rng/device/engine/builder/builder.hpp similarity index 87% rename from dpnp/backend/extensions/rng/device/engine/engine_builder.hpp rename to dpnp/backend/extensions/rng/device/engine/builder/builder.hpp index 78deeedbca69..d1bc4643ddc7 100644 --- a/dpnp/backend/extensions/rng/device/engine/engine_builder.hpp +++ b/dpnp/backend/extensions/rng/device/engine/builder/builder.hpp @@ -26,13 +26,13 @@ #pragma once -namespace dpnp::backend::ext::rng::device::engine +namespace dpnp::backend::ext::rng::device::engine::builder { template class Builder {}; -} // dpnp::backend::ext::rng::device::engine +} // dpnp::backend::ext::rng::device::engine::builder -#include "mrg32k3a_builder.hpp" -#include "philox4x32x10_builder.hpp" -#include "mcg31m1_builder.hpp" -#include "mcg59_builder.hpp" +#include "mrg32k3a.hpp" +#include "philox4x32x10.hpp" +#include "mcg31m1.hpp" +#include "mcg59.hpp" diff --git a/dpnp/backend/extensions/rng/device/engine/mcg31m1_builder.hpp b/dpnp/backend/extensions/rng/device/engine/builder/mcg31m1.hpp similarity index 93% rename from dpnp/backend/extensions/rng/device/engine/mcg31m1_builder.hpp rename to dpnp/backend/extensions/rng/device/engine/builder/mcg31m1.hpp index 24a68c8cdf03..411ac02781ce 100644 --- a/dpnp/backend/extensions/rng/device/engine/mcg31m1_builder.hpp +++ b/dpnp/backend/extensions/rng/device/engine/builder/mcg31m1.hpp @@ -28,9 +28,9 @@ #include #include "engine_base.hpp" -#include "base_builder.hpp" +#include "base.hpp" -namespace dpnp::backend::ext::rng::device::engine +namespace dpnp::backend::ext::rng::device::engine::builder { namespace mkl_rng_dev = oneapi::mkl::rng::device; @@ -41,4 +41,4 @@ class Builder> : public BaseBuilder(engine) {} }; -} // dpnp::backend::ext::rng::device::engine +} // dpnp::backend::ext::rng::device::engine::builder diff --git a/dpnp/backend/extensions/rng/device/engine/mcg59_builder.hpp b/dpnp/backend/extensions/rng/device/engine/builder/mcg59.hpp similarity index 93% rename from dpnp/backend/extensions/rng/device/engine/mcg59_builder.hpp rename to dpnp/backend/extensions/rng/device/engine/builder/mcg59.hpp index a16e3aeaf4ee..17761031fd41 100644 --- a/dpnp/backend/extensions/rng/device/engine/mcg59_builder.hpp +++ b/dpnp/backend/extensions/rng/device/engine/builder/mcg59.hpp @@ -28,9 +28,9 @@ #include #include "engine_base.hpp" -#include "base_builder.hpp" +#include "base.hpp" -namespace dpnp::backend::ext::rng::device::engine +namespace dpnp::backend::ext::rng::device::engine::builder { namespace mkl_rng_dev = oneapi::mkl::rng::device; @@ -41,4 +41,4 @@ class Builder> : public BaseBuilder(engine) {} }; -} // dpnp::backend::ext::rng::device::engine +} // dpnp::backend::ext::rng::device::engine::builder diff --git a/dpnp/backend/extensions/rng/device/engine/mrg32k3a_builder.hpp b/dpnp/backend/extensions/rng/device/engine/builder/mrg32k3a.hpp similarity index 93% rename from dpnp/backend/extensions/rng/device/engine/mrg32k3a_builder.hpp rename to dpnp/backend/extensions/rng/device/engine/builder/mrg32k3a.hpp index e7ef86cd3bab..2e025ad23178 100644 --- a/dpnp/backend/extensions/rng/device/engine/mrg32k3a_builder.hpp +++ b/dpnp/backend/extensions/rng/device/engine/builder/mrg32k3a.hpp @@ -28,9 +28,9 @@ #include #include "engine_base.hpp" -#include "base_builder.hpp" +#include "base.hpp" -namespace dpnp::backend::ext::rng::device::engine +namespace dpnp::backend::ext::rng::device::engine::builder { namespace mkl_rng_dev = oneapi::mkl::rng::device; @@ -41,4 +41,4 @@ class Builder> : public BaseBuilder(engine) {} }; -} // dpnp::backend::ext::rng::device::engine +} // dpnp::backend::ext::rng::device::engine::builder diff --git a/dpnp/backend/extensions/rng/device/engine/philox4x32x10_builder.hpp b/dpnp/backend/extensions/rng/device/engine/builder/philox4x32x10.hpp similarity index 93% rename from dpnp/backend/extensions/rng/device/engine/philox4x32x10_builder.hpp rename to dpnp/backend/extensions/rng/device/engine/builder/philox4x32x10.hpp index 7186ae7c3a3b..5c3cf183413a 100644 --- a/dpnp/backend/extensions/rng/device/engine/philox4x32x10_builder.hpp +++ b/dpnp/backend/extensions/rng/device/engine/builder/philox4x32x10.hpp @@ -28,9 +28,9 @@ #include #include "engine_base.hpp" -#include "base_builder.hpp" +#include "base.hpp" -namespace dpnp::backend::ext::rng::device::engine +namespace dpnp::backend::ext::rng::device::engine::builder { namespace mkl_rng_dev = oneapi::mkl::rng::device; @@ -41,4 +41,4 @@ class Builder> : public BaseBuilder(engine) {} }; -} // dpnp::backend::ext::rng::device::engine +} // dpnp::backend::ext::rng::device::engine::builder diff --git a/dpnp/backend/extensions/rng/device/gaussian.cpp b/dpnp/backend/extensions/rng/device/gaussian.cpp index 284ad4ff86db..9cfb5675b9e9 100644 --- a/dpnp/backend/extensions/rng/device/gaussian.cpp +++ b/dpnp/backend/extensions/rng/device/gaussian.cpp @@ -37,7 +37,7 @@ #include "gaussian.hpp" #include "engine/engine_base.hpp" -#include "engine/engine_builder.hpp" +#include "engine/builder/builder.hpp" #include "dispatch/matrix.hpp" #include "dispatch/table_builder.hpp" @@ -127,7 +127,7 @@ static sycl::event gaussian_impl(engine::EngineBase *engine, distr_event = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(depends); - using EngineBuilderT = engine::Builder; + using EngineBuilderT = engine::builder::Builder; EngineBuilderT eng_builder(engine); eng_builder.print(); // TODO: remove From b6554d0504fa01012308649a7da8dbcf85f56ac4 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Thu, 15 Feb 2024 15:24:53 +0100 Subject: [PATCH 09/16] Added mcg59 engine --- .../rng/device/dispatch/table_builder.hpp | 3 +- .../{engine_base.hpp => base_engine.hpp} | 1 + .../builder/{base.hpp => base_builder.hpp} | 22 ++++++- .../rng/device/engine/builder/mcg31m1.hpp | 4 +- .../rng/device/engine/builder/mcg59.hpp | 4 +- .../rng/device/engine/builder/mrg32k3a.hpp | 4 +- .../device/engine/builder/philox4x32x10.hpp | 4 +- .../rng/device/engine/mcg59_engine.hpp | 61 +++++++++++++++++++ .../rng/device/engine/mrg32k3a_engine.hpp | 2 +- .../extensions/rng/device/gaussian.cpp | 2 +- .../extensions/rng/device/gaussian.hpp | 2 +- dpnp/backend/extensions/rng/device/rng_py.cpp | 4 ++ 12 files changed, 98 insertions(+), 15 deletions(-) rename dpnp/backend/extensions/rng/device/engine/{engine_base.hpp => base_engine.hpp} (99%) rename dpnp/backend/extensions/rng/device/engine/builder/{base.hpp => base_builder.hpp} (80%) create mode 100644 dpnp/backend/extensions/rng/device/engine/mcg59_engine.hpp diff --git a/dpnp/backend/extensions/rng/device/dispatch/table_builder.hpp b/dpnp/backend/extensions/rng/device/dispatch/table_builder.hpp index 247f968e3196..a1a7883185ce 100644 --- a/dpnp/backend/extensions/rng/device/dispatch/table_builder.hpp +++ b/dpnp/backend/extensions/rng/device/dispatch/table_builder.hpp @@ -79,7 +79,8 @@ class Dispatch3DTableBuilder void populate(funcPtrT table[][_no_of_types][_no_of_methods]) const { - const auto map_by_engine = {table_per_type_and_method>()}; + const auto map_by_engine = {table_per_type_and_method>(), + table_per_type_and_method>()}; assert(map_by_engine.size() == _no_of_engines); std::uint16_t engine_id = 0; diff --git a/dpnp/backend/extensions/rng/device/engine/engine_base.hpp b/dpnp/backend/extensions/rng/device/engine/base_engine.hpp similarity index 99% rename from dpnp/backend/extensions/rng/device/engine/engine_base.hpp rename to dpnp/backend/extensions/rng/device/engine/base_engine.hpp index 1e85e22ebbe4..c7f66ed16e77 100644 --- a/dpnp/backend/extensions/rng/device/engine/engine_base.hpp +++ b/dpnp/backend/extensions/rng/device/engine/base_engine.hpp @@ -34,6 +34,7 @@ class EngineType { public: enum Type : std::uint8_t { MRG32k3a = 0, + MCG59, Base, // must be the last always }; diff --git a/dpnp/backend/extensions/rng/device/engine/builder/base.hpp b/dpnp/backend/extensions/rng/device/engine/builder/base_builder.hpp similarity index 80% rename from dpnp/backend/extensions/rng/device/engine/builder/base.hpp rename to dpnp/backend/extensions/rng/device/engine/builder/base_builder.hpp index a1040fe8ef29..f2aa61fb8765 100644 --- a/dpnp/backend/extensions/rng/device/engine/builder/base.hpp +++ b/dpnp/backend/extensions/rng/device/engine/builder/base_builder.hpp @@ -25,7 +25,11 @@ #pragma once -#include "engine_base.hpp" +#include "base_engine.hpp" + +// TODO: remove the include once issue in MKL is resolved +#include +namespace mkl_rng_dev = oneapi::mkl::rng::device; namespace dpnp::backend::ext::rng::device::engine::builder @@ -71,7 +75,13 @@ class BaseBuilder { { switch (no_of_seeds) { case 1: { - return EngineT({seeds[0]}, {offsets[0]}); + if constexpr (std::is_same_v>) { + // issue with mcg59<>() constructor which breaks compilation + return EngineT(seeds[0], offsets[0]); + } + else { + return EngineT({seeds[0]}, offsets[0]); + } } // TODO: implement full switch default: @@ -84,7 +94,13 @@ class BaseBuilder { { switch (no_of_seeds) { case 1: { - return EngineT({seeds[0]}, offset); + if constexpr (std::is_same_v>) { + // issue with mcg59<>() constructor which breaks compilation + return EngineT(seeds[0], offsets[0]); + } + else { + return EngineT({seeds[0]}, {offset}); + } } // TODO: implement full switch default: diff --git a/dpnp/backend/extensions/rng/device/engine/builder/mcg31m1.hpp b/dpnp/backend/extensions/rng/device/engine/builder/mcg31m1.hpp index 411ac02781ce..8e99afde372e 100644 --- a/dpnp/backend/extensions/rng/device/engine/builder/mcg31m1.hpp +++ b/dpnp/backend/extensions/rng/device/engine/builder/mcg31m1.hpp @@ -27,8 +27,8 @@ #include -#include "engine_base.hpp" -#include "base.hpp" +#include "base_engine.hpp" +#include "base_builder.hpp" namespace dpnp::backend::ext::rng::device::engine::builder { diff --git a/dpnp/backend/extensions/rng/device/engine/builder/mcg59.hpp b/dpnp/backend/extensions/rng/device/engine/builder/mcg59.hpp index 17761031fd41..d4bc754d8bde 100644 --- a/dpnp/backend/extensions/rng/device/engine/builder/mcg59.hpp +++ b/dpnp/backend/extensions/rng/device/engine/builder/mcg59.hpp @@ -27,8 +27,8 @@ #include -#include "engine_base.hpp" -#include "base.hpp" +#include "base_engine.hpp" +#include "base_builder.hpp" namespace dpnp::backend::ext::rng::device::engine::builder { diff --git a/dpnp/backend/extensions/rng/device/engine/builder/mrg32k3a.hpp b/dpnp/backend/extensions/rng/device/engine/builder/mrg32k3a.hpp index 2e025ad23178..940df64f3a02 100644 --- a/dpnp/backend/extensions/rng/device/engine/builder/mrg32k3a.hpp +++ b/dpnp/backend/extensions/rng/device/engine/builder/mrg32k3a.hpp @@ -27,8 +27,8 @@ #include -#include "engine_base.hpp" -#include "base.hpp" +#include "base_engine.hpp" +#include "base_builder.hpp" namespace dpnp::backend::ext::rng::device::engine::builder { diff --git a/dpnp/backend/extensions/rng/device/engine/builder/philox4x32x10.hpp b/dpnp/backend/extensions/rng/device/engine/builder/philox4x32x10.hpp index 5c3cf183413a..b7a184de26d0 100644 --- a/dpnp/backend/extensions/rng/device/engine/builder/philox4x32x10.hpp +++ b/dpnp/backend/extensions/rng/device/engine/builder/philox4x32x10.hpp @@ -27,8 +27,8 @@ #include -#include "engine_base.hpp" -#include "base.hpp" +#include "base_engine.hpp" +#include "base_builder.hpp" namespace dpnp::backend::ext::rng::device::engine::builder { diff --git a/dpnp/backend/extensions/rng/device/engine/mcg59_engine.hpp b/dpnp/backend/extensions/rng/device/engine/mcg59_engine.hpp new file mode 100644 index 000000000000..f2e16ed84a8d --- /dev/null +++ b/dpnp/backend/extensions/rng/device/engine/mcg59_engine.hpp @@ -0,0 +1,61 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include "base_engine.hpp" + + +namespace dpnp::backend::ext::rng::device::engine +{ +class MCG59 : public EngineBase { +private: + sycl::queue q_; + std::vector seed_vec; + std::vector offset_vec; + +public: + MCG59(sycl::queue &q, std::uint32_t seed, std::uint64_t offset = 0) : q_(q) { + seed_vec.push_back(seed); + offset_vec.push_back(offset); + } + + sycl::queue &get_queue() override { + return q_; + } + + virtual EngineType get_type() const noexcept override { + return EngineType::MCG59; + } + + virtual std::vector get_seeds() const noexcept override { + return seed_vec; + } + + virtual std::vector get_offsets() const noexcept override { + return offset_vec; + } +}; +} // dpnp::backend::ext::rng::device::engine diff --git a/dpnp/backend/extensions/rng/device/engine/mrg32k3a_engine.hpp b/dpnp/backend/extensions/rng/device/engine/mrg32k3a_engine.hpp index 413ff43540a5..d3268c5631f1 100644 --- a/dpnp/backend/extensions/rng/device/engine/mrg32k3a_engine.hpp +++ b/dpnp/backend/extensions/rng/device/engine/mrg32k3a_engine.hpp @@ -25,7 +25,7 @@ #pragma once -#include "engine_base.hpp" +#include "base_engine.hpp" namespace dpnp::backend::ext::rng::device::engine diff --git a/dpnp/backend/extensions/rng/device/gaussian.cpp b/dpnp/backend/extensions/rng/device/gaussian.cpp index 9cfb5675b9e9..7109f62c9e61 100644 --- a/dpnp/backend/extensions/rng/device/gaussian.cpp +++ b/dpnp/backend/extensions/rng/device/gaussian.cpp @@ -36,7 +36,7 @@ #include "common_impl.hpp" #include "gaussian.hpp" -#include "engine/engine_base.hpp" +#include "engine/base_engine.hpp" #include "engine/builder/builder.hpp" #include "dispatch/matrix.hpp" diff --git a/dpnp/backend/extensions/rng/device/gaussian.hpp b/dpnp/backend/extensions/rng/device/gaussian.hpp index 6c7baee6c1af..b2b771ab1f34 100644 --- a/dpnp/backend/extensions/rng/device/gaussian.hpp +++ b/dpnp/backend/extensions/rng/device/gaussian.hpp @@ -31,7 +31,7 @@ #include -#include "engine/engine_base.hpp" +#include "engine/base_engine.hpp" namespace dpnp::backend::ext::rng::device diff --git a/dpnp/backend/extensions/rng/device/rng_py.cpp b/dpnp/backend/extensions/rng/device/rng_py.cpp index 3cc418fb495f..61c1714eced4 100644 --- a/dpnp/backend/extensions/rng/device/rng_py.cpp +++ b/dpnp/backend/extensions/rng/device/rng_py.cpp @@ -35,7 +35,9 @@ #include #include "gaussian.hpp" + #include "engine/mrg32k3a_engine.hpp" +#include "engine/mcg59_engine.hpp" namespace mkl_rng = oneapi::mkl::rng; namespace rng_dev_ext = dpnp::backend::ext::rng::device; @@ -82,6 +84,8 @@ PYBIND11_MODULE(_rng_dev_impl, m) py::class_(m, "MRG32k3a") .def(py::init()); + py::class_(m, "MCG59") + .def(py::init()); m.def("_gaussian", &rng_dev_ext::gaussian, "", From 1a807bed9c702476b08f5ce71379f305a1c1d08b Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Thu, 15 Feb 2024 16:47:23 +0100 Subject: [PATCH 10/16] Added philox4x32x10 and mcg31m1 engines --- .../rng/device/dispatch/table_builder.hpp | 2 + .../rng/device/engine/base_engine.hpp | 2 + .../device/engine/builder/philox4x32x10.hpp | 2 +- .../rng/device/engine/mcg31m1_engine.hpp | 61 +++++++++++++++++++ .../rng/device/engine/mcg59_engine.hpp | 4 +- .../rng/device/engine/mrg32k3a_engine.hpp | 4 +- .../device/engine/philox4x32x10_engine.hpp | 61 +++++++++++++++++++ dpnp/backend/extensions/rng/device/rng_py.cpp | 9 +++ 8 files changed, 140 insertions(+), 5 deletions(-) create mode 100644 dpnp/backend/extensions/rng/device/engine/mcg31m1_engine.hpp create mode 100644 dpnp/backend/extensions/rng/device/engine/philox4x32x10_engine.hpp diff --git a/dpnp/backend/extensions/rng/device/dispatch/table_builder.hpp b/dpnp/backend/extensions/rng/device/dispatch/table_builder.hpp index a1a7883185ce..9a6c09f56228 100644 --- a/dpnp/backend/extensions/rng/device/dispatch/table_builder.hpp +++ b/dpnp/backend/extensions/rng/device/dispatch/table_builder.hpp @@ -80,6 +80,8 @@ class Dispatch3DTableBuilder void populate(funcPtrT table[][_no_of_types][_no_of_methods]) const { const auto map_by_engine = {table_per_type_and_method>(), + table_per_type_and_method>(), + table_per_type_and_method>(), table_per_type_and_method>()}; assert(map_by_engine.size() == _no_of_engines); diff --git a/dpnp/backend/extensions/rng/device/engine/base_engine.hpp b/dpnp/backend/extensions/rng/device/engine/base_engine.hpp index c7f66ed16e77..c9cdf15d037b 100644 --- a/dpnp/backend/extensions/rng/device/engine/base_engine.hpp +++ b/dpnp/backend/extensions/rng/device/engine/base_engine.hpp @@ -34,6 +34,8 @@ class EngineType { public: enum Type : std::uint8_t { MRG32k3a = 0, + PHILOX4x32x10, + MCG31M1, MCG59, Base, // must be the last always }; diff --git a/dpnp/backend/extensions/rng/device/engine/builder/philox4x32x10.hpp b/dpnp/backend/extensions/rng/device/engine/builder/philox4x32x10.hpp index b7a184de26d0..b0abc5c86b86 100644 --- a/dpnp/backend/extensions/rng/device/engine/builder/philox4x32x10.hpp +++ b/dpnp/backend/extensions/rng/device/engine/builder/philox4x32x10.hpp @@ -39,6 +39,6 @@ class Builder> : public BaseBuilder; - Builder(EngineBase *engine) : BaseBuilder(engine) {} + Builder(EngineBase *engine) : BaseBuilder(engine) {} }; } // dpnp::backend::ext::rng::device::engine::builder diff --git a/dpnp/backend/extensions/rng/device/engine/mcg31m1_engine.hpp b/dpnp/backend/extensions/rng/device/engine/mcg31m1_engine.hpp new file mode 100644 index 000000000000..d6cfa2221197 --- /dev/null +++ b/dpnp/backend/extensions/rng/device/engine/mcg31m1_engine.hpp @@ -0,0 +1,61 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include "base_engine.hpp" + + +namespace dpnp::backend::ext::rng::device::engine +{ +class MCG31M1 : public EngineBase { +private: + sycl::queue q_; + std::vector seed_vec{}; + std::vector offset_vec{}; + +public: + MCG31M1(sycl::queue &q, std::uint32_t seed, std::uint64_t offset = 0) : q_(q) { + seed_vec.push_back(seed); + offset_vec.push_back(offset); + } + + sycl::queue &get_queue() override { + return q_; + } + + virtual EngineType get_type() const noexcept override { + return EngineType::MCG31M1; + } + + virtual std::vector get_seeds() const noexcept override { + return seed_vec; + } + + virtual std::vector get_offsets() const noexcept override { + return offset_vec; + } +}; +} // dpnp::backend::ext::rng::device::engine diff --git a/dpnp/backend/extensions/rng/device/engine/mcg59_engine.hpp b/dpnp/backend/extensions/rng/device/engine/mcg59_engine.hpp index f2e16ed84a8d..488f7151a1a7 100644 --- a/dpnp/backend/extensions/rng/device/engine/mcg59_engine.hpp +++ b/dpnp/backend/extensions/rng/device/engine/mcg59_engine.hpp @@ -33,8 +33,8 @@ namespace dpnp::backend::ext::rng::device::engine class MCG59 : public EngineBase { private: sycl::queue q_; - std::vector seed_vec; - std::vector offset_vec; + std::vector seed_vec{}; + std::vector offset_vec{}; public: MCG59(sycl::queue &q, std::uint32_t seed, std::uint64_t offset = 0) : q_(q) { diff --git a/dpnp/backend/extensions/rng/device/engine/mrg32k3a_engine.hpp b/dpnp/backend/extensions/rng/device/engine/mrg32k3a_engine.hpp index d3268c5631f1..5edd0a13db3b 100644 --- a/dpnp/backend/extensions/rng/device/engine/mrg32k3a_engine.hpp +++ b/dpnp/backend/extensions/rng/device/engine/mrg32k3a_engine.hpp @@ -33,8 +33,8 @@ namespace dpnp::backend::ext::rng::device::engine class MRG32k3a : public EngineBase { private: sycl::queue q_; - std::vector seed_vec; - std::vector offset_vec; + std::vector seed_vec{}; + std::vector offset_vec{}; public: MRG32k3a(sycl::queue &q, std::uint32_t seed, std::uint64_t offset = 0) : q_(q) { diff --git a/dpnp/backend/extensions/rng/device/engine/philox4x32x10_engine.hpp b/dpnp/backend/extensions/rng/device/engine/philox4x32x10_engine.hpp new file mode 100644 index 000000000000..17a1b07a53bb --- /dev/null +++ b/dpnp/backend/extensions/rng/device/engine/philox4x32x10_engine.hpp @@ -0,0 +1,61 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include "base_engine.hpp" + + +namespace dpnp::backend::ext::rng::device::engine +{ +class PHILOX4x32x10 : public EngineBase { +private: + sycl::queue q_; + std::vector seed_vec{}; + std::vector offset_vec{}; + +public: + PHILOX4x32x10(sycl::queue &q, std::uint64_t seed, std::uint64_t offset = 0) : q_(q) { + seed_vec.push_back(seed); + offset_vec.push_back(offset); + } + + sycl::queue &get_queue() override { + return q_; + } + + virtual EngineType get_type() const noexcept override { + return EngineType::PHILOX4x32x10; + } + + virtual std::vector get_seeds() const noexcept override { + return seed_vec; + } + + virtual std::vector get_offsets() const noexcept override { + return offset_vec; + } +}; +} // dpnp::backend::ext::rng::device::engine diff --git a/dpnp/backend/extensions/rng/device/rng_py.cpp b/dpnp/backend/extensions/rng/device/rng_py.cpp index 61c1714eced4..ca542db20f10 100644 --- a/dpnp/backend/extensions/rng/device/rng_py.cpp +++ b/dpnp/backend/extensions/rng/device/rng_py.cpp @@ -37,8 +37,11 @@ #include "gaussian.hpp" #include "engine/mrg32k3a_engine.hpp" +#include "engine/philox4x32x10_engine.hpp" +#include "engine/mcg31m1_engine.hpp" #include "engine/mcg59_engine.hpp" + namespace mkl_rng = oneapi::mkl::rng; namespace rng_dev_ext = dpnp::backend::ext::rng::device; namespace rng_dev_engine = dpnp::backend::ext::rng::device::engine; @@ -84,6 +87,12 @@ PYBIND11_MODULE(_rng_dev_impl, m) py::class_(m, "MRG32k3a") .def(py::init()); + py::class_(m, "PHILOX4x32x10") + .def(py::init()); + + py::class_(m, "MCG31M1") + .def(py::init()); + py::class_(m, "MCG59") .def(py::init()); From 6c78e4aed9f37d156204ebb5df832dda3ad422cc Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Thu, 15 Feb 2024 18:31:11 +0100 Subject: [PATCH 11/16] Reworked engines classes whech bind with python --- .../extensions/rng/device/common_impl.hpp | 8 ++- .../rng/device/engine/base_engine.hpp | 42 +++++++++++++-- .../rng/device/engine/mcg31m1_engine.hpp | 24 ++------- .../rng/device/engine/mcg59_engine.hpp | 24 ++------- .../rng/device/engine/mrg32k3a_engine.hpp | 30 ++++------- .../device/engine/philox4x32x10_engine.hpp | 30 ++++------- .../extensions/rng/device/gaussian.cpp | 28 +++------- .../extensions/rng/device/gaussian.hpp | 6 +-- dpnp/backend/extensions/rng/device/rng_py.cpp | 53 ++++++++++--------- 9 files changed, 102 insertions(+), 143 deletions(-) diff --git a/dpnp/backend/extensions/rng/device/common_impl.hpp b/dpnp/backend/extensions/rng/device/common_impl.hpp index c2f573a233ba..5b7d946069ec 100644 --- a/dpnp/backend/extensions/rng/device/common_impl.hpp +++ b/dpnp/backend/extensions/rng/device/common_impl.hpp @@ -30,9 +30,6 @@ #include #include -// dpctl tensor headers -// #include "utils/offset_utils.hpp" - namespace dpnp { namespace backend @@ -71,14 +68,15 @@ struct RngContigFunctor void operator()(sycl::nd_item<1> nd_it) const { - auto global_id = nd_it.get_global_id(); + // auto global_id = nd_it.get_global_id(); auto sg = nd_it.get_sub_group(); const std::uint8_t sg_size = sg.get_local_range()[0]; const std::uint8_t max_sg_size = sg.get_max_local_range()[0]; using EngineT = typename EngineBuilderT::EngineType; - EngineT engine = engine_(nelems_ * global_id); // offset is questionable... + // EngineT engine = engine_(nelems_ * global_id); // offset is questionable... + EngineT engine = engine_(); using DistrT = typename DistributorBuilderT::distr_type; DistrT distr = distr_(); diff --git a/dpnp/backend/extensions/rng/device/engine/base_engine.hpp b/dpnp/backend/extensions/rng/device/engine/base_engine.hpp index c9cdf15d037b..b8a0ecda4257 100644 --- a/dpnp/backend/extensions/rng/device/engine/base_engine.hpp +++ b/dpnp/backend/extensions/rng/device/engine/base_engine.hpp @@ -59,20 +59,52 @@ class EngineType { constexpr int no_of_engines = EngineType::base_id(); class EngineBase { +private: + sycl::queue q_{}; + std::vector seed_vec{}; + std::vector offset_vec{}; + public: + EngineBase() {} + + EngineBase(sycl::queue &q, std::uint64_t seed, std::uint64_t offset) : + q_(q), seed_vec(1, seed), offset_vec(1, offset) {} + + EngineBase(sycl::queue &q, std::vector &seeds, std::uint64_t offset) : + q_(q), seed_vec(seeds), offset_vec(1, offset) {} + + EngineBase(sycl::queue &q, std::vector &seeds, std::uint64_t offset) : q_(q), offset_vec(1, offset) { + seed_vec.reserve(seeds.size()); + seed_vec.assign(seeds.begin(), seeds.end()); + } + + EngineBase(sycl::queue &q, std::uint64_t seed, std::vector &offsets) : + q_(q), seed_vec(1, seed), offset_vec(offsets) {} + + EngineBase(sycl::queue &q, std::vector &seeds, std::vector &offsets) : + q_(q), seed_vec(seeds), offset_vec(offsets) {} + + EngineBase(sycl::queue &q, std::vector &seeds, std::vector &offsets) : q_(q), offset_vec(offsets) { + seed_vec.reserve(seeds.size()); + seed_vec.assign(seeds.begin(), seeds.end()); + } + virtual ~EngineBase() {} - virtual sycl::queue &get_queue() = 0; virtual EngineType get_type() const noexcept { return EngineType::Base; } - virtual std::vector get_seeds() const noexcept { - return std::vector(); + sycl::queue &get_queue() noexcept { + return q_; + } + + std::vector& get_seeds() noexcept { + return seed_vec; } - virtual std::vector get_offsets() const noexcept { - return std::vector(); + std::vector& get_offsets() noexcept { + return offset_vec; } }; } // dpnp::backend::ext::rng::device::engine diff --git a/dpnp/backend/extensions/rng/device/engine/mcg31m1_engine.hpp b/dpnp/backend/extensions/rng/device/engine/mcg31m1_engine.hpp index d6cfa2221197..2a8cdee310c7 100644 --- a/dpnp/backend/extensions/rng/device/engine/mcg31m1_engine.hpp +++ b/dpnp/backend/extensions/rng/device/engine/mcg31m1_engine.hpp @@ -31,31 +31,15 @@ namespace dpnp::backend::ext::rng::device::engine { class MCG31M1 : public EngineBase { -private: - sycl::queue q_; - std::vector seed_vec{}; - std::vector offset_vec{}; - public: - MCG31M1(sycl::queue &q, std::uint32_t seed, std::uint64_t offset = 0) : q_(q) { - seed_vec.push_back(seed); - offset_vec.push_back(offset); - } + MCG31M1(sycl::queue &q, std::uint32_t seed, std::uint64_t offset = 0) : + EngineBase(q, seed, offset) {} - sycl::queue &get_queue() override { - return q_; - } + MCG31M1(sycl::queue &q, std::vector &seeds, std::uint64_t offset = 0) : + EngineBase(q, seeds, offset) {} virtual EngineType get_type() const noexcept override { return EngineType::MCG31M1; } - - virtual std::vector get_seeds() const noexcept override { - return seed_vec; - } - - virtual std::vector get_offsets() const noexcept override { - return offset_vec; - } }; } // dpnp::backend::ext::rng::device::engine diff --git a/dpnp/backend/extensions/rng/device/engine/mcg59_engine.hpp b/dpnp/backend/extensions/rng/device/engine/mcg59_engine.hpp index 488f7151a1a7..313a351426b6 100644 --- a/dpnp/backend/extensions/rng/device/engine/mcg59_engine.hpp +++ b/dpnp/backend/extensions/rng/device/engine/mcg59_engine.hpp @@ -31,31 +31,15 @@ namespace dpnp::backend::ext::rng::device::engine { class MCG59 : public EngineBase { -private: - sycl::queue q_; - std::vector seed_vec{}; - std::vector offset_vec{}; - public: - MCG59(sycl::queue &q, std::uint32_t seed, std::uint64_t offset = 0) : q_(q) { - seed_vec.push_back(seed); - offset_vec.push_back(offset); - } + MCG59(sycl::queue &q, std::uint32_t seed, std::uint64_t offset = 0) : + EngineBase(q, seed, offset) {} - sycl::queue &get_queue() override { - return q_; - } + MCG59(sycl::queue &q, std::vector &seeds, std::uint64_t offset = 0) : + EngineBase(q, seeds, offset) {} virtual EngineType get_type() const noexcept override { return EngineType::MCG59; } - - virtual std::vector get_seeds() const noexcept override { - return seed_vec; - } - - virtual std::vector get_offsets() const noexcept override { - return offset_vec; - } }; } // dpnp::backend::ext::rng::device::engine diff --git a/dpnp/backend/extensions/rng/device/engine/mrg32k3a_engine.hpp b/dpnp/backend/extensions/rng/device/engine/mrg32k3a_engine.hpp index 5edd0a13db3b..a0fcad167afb 100644 --- a/dpnp/backend/extensions/rng/device/engine/mrg32k3a_engine.hpp +++ b/dpnp/backend/extensions/rng/device/engine/mrg32k3a_engine.hpp @@ -31,31 +31,21 @@ namespace dpnp::backend::ext::rng::device::engine { class MRG32k3a : public EngineBase { -private: - sycl::queue q_; - std::vector seed_vec{}; - std::vector offset_vec{}; - public: - MRG32k3a(sycl::queue &q, std::uint32_t seed, std::uint64_t offset = 0) : q_(q) { - seed_vec.push_back(seed); - offset_vec.push_back(offset); - } + MRG32k3a(sycl::queue &q, std::uint32_t seed, std::uint64_t offset = 0) : + EngineBase(q, seed, offset) {} - sycl::queue &get_queue() override { - return q_; - } + MRG32k3a(sycl::queue &q, std::vector &seeds, std::uint64_t offset = 0) : + EngineBase(q, seeds, offset) {} - virtual EngineType get_type() const noexcept override { - return EngineType::MRG32k3a; - } + MRG32k3a(sycl::queue &q, std::uint32_t seed, std::vector &offsets) : + EngineBase(q, seed, offsets) {} - virtual std::vector get_seeds() const noexcept override { - return seed_vec; - } + MRG32k3a(sycl::queue &q, std::vector &seeds, std::vector &offsets) : + EngineBase(q, seeds, offsets) {} - virtual std::vector get_offsets() const noexcept override { - return offset_vec; + virtual EngineType get_type() const noexcept override { + return EngineType::MRG32k3a; } }; } // dpnp::backend::ext::rng::device::engine diff --git a/dpnp/backend/extensions/rng/device/engine/philox4x32x10_engine.hpp b/dpnp/backend/extensions/rng/device/engine/philox4x32x10_engine.hpp index 17a1b07a53bb..bba8a5c63eb2 100644 --- a/dpnp/backend/extensions/rng/device/engine/philox4x32x10_engine.hpp +++ b/dpnp/backend/extensions/rng/device/engine/philox4x32x10_engine.hpp @@ -31,31 +31,21 @@ namespace dpnp::backend::ext::rng::device::engine { class PHILOX4x32x10 : public EngineBase { -private: - sycl::queue q_; - std::vector seed_vec{}; - std::vector offset_vec{}; - public: - PHILOX4x32x10(sycl::queue &q, std::uint64_t seed, std::uint64_t offset = 0) : q_(q) { - seed_vec.push_back(seed); - offset_vec.push_back(offset); - } + PHILOX4x32x10(sycl::queue &q, std::uint64_t seed, std::uint64_t offset = 0) : + EngineBase(q, seed, offset) {} - sycl::queue &get_queue() override { - return q_; - } + PHILOX4x32x10(sycl::queue &q, std::vector &seeds, std::uint64_t offset = 0) : + EngineBase(q, seeds, offset) {} - virtual EngineType get_type() const noexcept override { - return EngineType::PHILOX4x32x10; - } + PHILOX4x32x10(sycl::queue &q, std::uint64_t seed, std::vector &offsets) : + EngineBase(q, seed, offsets) {} - virtual std::vector get_seeds() const noexcept override { - return seed_vec; - } + PHILOX4x32x10(sycl::queue &q, std::vector &seeds, std::vector &offsets) : + EngineBase(q, seeds, offsets) {} - virtual std::vector get_offsets() const noexcept override { - return offset_vec; + virtual EngineType get_type() const noexcept override { + return EngineType::PHILOX4x32x10; } }; } // dpnp::backend::ext::rng::device::engine diff --git a/dpnp/backend/extensions/rng/device/gaussian.cpp b/dpnp/backend/extensions/rng/device/gaussian.cpp index 7109f62c9e61..0dc81f75a369 100644 --- a/dpnp/backend/extensions/rng/device/gaussian.cpp +++ b/dpnp/backend/extensions/rng/device/gaussian.cpp @@ -26,38 +26,26 @@ #include // dpctl tensor headers -// #include "utils/memory_overlap.hpp" #include "utils/type_dispatch.hpp" #include "utils/type_utils.hpp" - -// dpctl tensor headers #include "kernels/alignment.hpp" -#include "common_impl.hpp" #include "gaussian.hpp" +#include "common_impl.hpp" -#include "engine/base_engine.hpp" #include "engine/builder/builder.hpp" #include "dispatch/matrix.hpp" #include "dispatch/table_builder.hpp" -namespace dpnp -{ -namespace backend -{ -namespace ext -{ -namespace rng -{ -namespace device +namespace dpnp::backend::ext::rng::device { namespace dpctl_krn_ns = dpctl::tensor::kernels::alignment_utils; namespace dpctl_td_ns = dpctl::tensor::type_dispatch; +namespace dpctl_tu_ns = dpctl::tensor::type_utils; namespace mkl_rng_dev = oneapi::mkl::rng::device; namespace py = pybind11; -namespace type_utils = dpctl::tensor::type_utils; using dpctl_krn_ns::disabled_sg_loadstore_wrapper_krn; using dpctl_krn_ns::is_aligned; @@ -109,7 +97,7 @@ static sycl::event gaussian_impl(engine::EngineBase *engine, const std::vector &depends) { auto &exec_q = engine->get_queue(); - type_utils::validate_type_for_device(exec_q); + dpctl_tu_ns::validate_type_for_device(exec_q); DataT *out = reinterpret_cast(out_ptr); DataT mean = static_cast(mean_val); @@ -242,13 +230,9 @@ struct GaussianContigFactory } }; -void init_gaussian_dispatch_table(void) +void init_gaussian_dispatch_3d_table(void) { dispatch::Dispatch3DTableBuilder contig; contig.populate(gaussian_dispatch_table); } -} // namespace device -} // namespace rng -} // namespace ext -} // namespace backend -} // namespace dpnp +} // dpnp::backend::ext::rng::device diff --git a/dpnp/backend/extensions/rng/device/gaussian.hpp b/dpnp/backend/extensions/rng/device/gaussian.hpp index b2b771ab1f34..581222c96a78 100644 --- a/dpnp/backend/extensions/rng/device/gaussian.hpp +++ b/dpnp/backend/extensions/rng/device/gaussian.hpp @@ -25,10 +25,6 @@ #pragma once -#include -#include -#include - #include #include "engine/base_engine.hpp" @@ -44,5 +40,5 @@ extern std::pair gaussian(engine::EngineBase *engine, dpctl::tensor::usm_ndarray res, const std::vector &depends = {}); -extern void init_gaussian_dispatch_table(void); +extern void init_gaussian_dispatch_3d_table(void); } // namespace dpnp::backend::ext::rng::device diff --git a/dpnp/backend/extensions/rng/device/rng_py.cpp b/dpnp/backend/extensions/rng/device/rng_py.cpp index ca542db20f10..9cbb3edba5b4 100644 --- a/dpnp/backend/extensions/rng/device/rng_py.cpp +++ b/dpnp/backend/extensions/rng/device/rng_py.cpp @@ -31,7 +31,6 @@ #include #include - #include #include "gaussian.hpp" @@ -47,54 +46,56 @@ namespace rng_dev_ext = dpnp::backend::ext::rng::device; namespace rng_dev_engine = dpnp::backend::ext::rng::device::engine; namespace py = pybind11; -// populate dispatch vectors -void init_dispatch_vectors(void) -{ - // rng_dev_ext::init_gaussian_dispatch_vector(); -} -// populate dispatch tables -void init_dispatch_tables(void) +// populate dispatch 3-D tables +void init_dispatch_3d_tables(void) { - rng_dev_ext::init_gaussian_dispatch_table(); + rng_dev_ext::init_gaussian_dispatch_3d_table(); } class PyEngineBase : public rng_dev_engine::EngineBase { public: - /* Inherit the constructors */ - using rng_dev_engine::EngineBase::EngineBase; - - /* Trampoline (need one for each virtual function) */ - sycl::queue &get_queue() override { - PYBIND11_OVERRIDE_PURE( - sycl::queue&, /* Return type */ - EngineBase, /* Parent class */ - get_queue, /* Name of function in C++ (must match Python name) */ - ); - } + // inherit the constructor + using EngineBase::EngineBase; + + // trampoline (need one for each virtual function) + // sycl::queue &get_queue() { + // PYBIND11_OVERRIDE_PURE( + // sycl::queue&, /* Return type */ + // EngineBase, /* Parent class */ + // get_queue, /* Name of function in C++ (must match Python name) */ + // ); + // } }; PYBIND11_MODULE(_rng_dev_impl, m) { - init_dispatch_vectors(); - init_dispatch_tables(); + init_dispatch_3d_tables(); py::class_(m, "EngineBase") .def(py::init<>()) .def("get_queue", &rng_dev_engine::EngineBase::get_queue); py::class_(m, "MRG32k3a") - .def(py::init()); + .def(py::init(), py::arg("sycl_queue"), py::arg("seed"), py::arg("offset") = 0) + .def(py::init &, std::uint64_t>(), py::arg("sycl_queue"), py::arg("seed"), py::arg("offset") = 0) + .def(py::init &>(), py::arg("sycl_queue"), py::arg("seed"), py::arg("offset") = py::list()) + .def(py::init &, std::vector &>(), py::arg("sycl_queue"), py::arg("seed"), py::arg("offset") = py::list()); py::class_(m, "PHILOX4x32x10") - .def(py::init()); + .def(py::init(), py::arg("sycl_queue"), py::arg("seed"), py::arg("offset") = 0) + .def(py::init &, std::uint64_t>(), py::arg("sycl_queue"), py::arg("seed"), py::arg("offset") = 0) + .def(py::init &>(), py::arg("sycl_queue"), py::arg("seed"), py::arg("offset") = py::list()) + .def(py::init &, std::vector &>(), py::arg("sycl_queue"), py::arg("seed"), py::arg("offset") = py::list()); py::class_(m, "MCG31M1") - .def(py::init()); + .def(py::init(), py::arg("sycl_queue"), py::arg("seed"), py::arg("offset") = 0) + .def(py::init &, std::uint64_t>(), py::arg("sycl_queue"), py::arg("seed"), py::arg("offset") = 0); py::class_(m, "MCG59") - .def(py::init()); + .def(py::init(), py::arg("sycl_queue"), py::arg("seed"), py::arg("offset") = 0) + .def(py::init &, std::uint64_t>(), py::arg("sycl_queue"), py::arg("seed"), py::arg("offset") = 0); m.def("_gaussian", &rng_dev_ext::gaussian, "", From 8b4d96a8ce168ac4507b1b68ab5afd7161b8bced Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 15 Mar 2024 20:39:49 +0100 Subject: [PATCH 12/16] Corrected offset usage --- .../extensions/rng/device/common_impl.hpp | 30 +++++++++---------- .../rng/device/engine/base_engine.hpp | 29 +++++++++++++++--- .../device/engine/builder/base_builder.hpp | 12 ++++---- 3 files changed, 45 insertions(+), 26 deletions(-) diff --git a/dpnp/backend/extensions/rng/device/common_impl.hpp b/dpnp/backend/extensions/rng/device/common_impl.hpp index 5b7d946069ec..405d70820232 100644 --- a/dpnp/backend/extensions/rng/device/common_impl.hpp +++ b/dpnp/backend/extensions/rng/device/common_impl.hpp @@ -58,38 +58,36 @@ struct RngContigFunctor EngineBuilderT engine_; DistributorBuilderT distr_; DataT * const res_ = nullptr; - const size_t nelems_; + const std::size_t nelems_; public: - RngContigFunctor(EngineBuilderT& engine, DistributorBuilderT& distr, DataT *res, const size_t n_elems) + RngContigFunctor(EngineBuilderT& engine, DistributorBuilderT& distr, DataT *res, const std::size_t n_elems) : engine_(engine), distr_(distr), res_(res), nelems_(n_elems) { } void operator()(sycl::nd_item<1> nd_it) const { - // auto global_id = nd_it.get_global_id(); - auto sg = nd_it.get_sub_group(); const std::uint8_t sg_size = sg.get_local_range()[0]; const std::uint8_t max_sg_size = sg.get_max_local_range()[0]; using EngineT = typename EngineBuilderT::EngineType; - // EngineT engine = engine_(nelems_ * global_id); // offset is questionable... - EngineT engine = engine_(); - using DistrT = typename DistributorBuilderT::distr_type; - DistrT distr = distr_(); constexpr std::size_t vec_sz = EngineT::vec_size; + constexpr std::size_t vi_per_wi = vec_sz * items_per_wi; + + EngineT engine = engine_(nd_it.get_global_id() * vi_per_wi); + DistrT distr = distr_(); if constexpr (enable_sg_load) { - const size_t base = items_per_wi * vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) + sg.get_group_id()[0] * max_sg_size); + const std::size_t base = vi_per_wi * (nd_it.get_group(0) * nd_it.get_local_range(0) + sg.get_group_id()[0] * max_sg_size); - if ((sg_size == max_sg_size) && (base + items_per_wi * vec_sz * sg_size < nelems_)) { + if ((sg_size == max_sg_size) && (base + vi_per_wi * sg_size < nelems_)) { #pragma unroll - for (std::uint16_t it = 0; it < items_per_wi * vec_sz; it += vec_sz) { - size_t offset = base + static_cast(it) * static_cast(sg_size); + for (std::uint16_t it = 0; it < vi_per_wi; it += vec_sz) { + std::size_t offset = base + static_cast(it) * static_cast(sg_size); auto out_multi_ptr = sycl::address_space_cast(&res_[offset]); sycl::vec rng_val_vec = mkl_rng_dev::generate(distr, engine); @@ -97,16 +95,16 @@ struct RngContigFunctor } } else { - for (size_t offset = base + sg.get_local_id()[0]; offset < nelems_; offset += sg_size) { + for (std::size_t offset = base + sg.get_local_id()[0]; offset < nelems_; offset += sg_size) { res_[offset] = mkl_rng_dev::generate_single(distr, engine); } } } else { - size_t base = nd_it.get_global_linear_id(); + std::size_t base = nd_it.get_global_linear_id(); - base = (base / sg_size) * sg_size * items_per_wi * vec_sz + (base % sg_size); - for (size_t offset = base; offset < std::min(nelems_, base + sg_size * (items_per_wi * vec_sz)); offset += sg_size) + base = (base / sg_size) * sg_size * vi_per_wi + (base % sg_size); + for (std::size_t offset = base; offset < std::min(nelems_, base + sg_size * vi_per_wi); offset += sg_size) { res_[offset] = mkl_rng_dev::generate_single(distr, engine); } diff --git a/dpnp/backend/extensions/rng/device/engine/base_engine.hpp b/dpnp/backend/extensions/rng/device/engine/base_engine.hpp index b8a0ecda4257..a14a52ebe46e 100644 --- a/dpnp/backend/extensions/rng/device/engine/base_engine.hpp +++ b/dpnp/backend/extensions/rng/device/engine/base_engine.hpp @@ -56,7 +56,7 @@ class EngineType { }; // A total number of supported engines == EngineType::Base -constexpr int no_of_engines = EngineType::base_id(); +constexpr std::uint8_t no_of_engines = EngineType::base_id(); class EngineBase { private: @@ -64,6 +64,12 @@ class EngineBase { std::vector seed_vec{}; std::vector offset_vec{}; + void validate_vec_size(const std::size_t size) { + if (size > max_vec_n) { + throw std::runtime_error("TODO: add text"); + } + } + public: EngineBase() {} @@ -71,20 +77,32 @@ class EngineBase { q_(q), seed_vec(1, seed), offset_vec(1, offset) {} EngineBase(sycl::queue &q, std::vector &seeds, std::uint64_t offset) : - q_(q), seed_vec(seeds), offset_vec(1, offset) {} + q_(q), seed_vec(seeds), offset_vec(1, offset) { + validate_vec_size(seeds.size()); + } EngineBase(sycl::queue &q, std::vector &seeds, std::uint64_t offset) : q_(q), offset_vec(1, offset) { + validate_vec_size(seeds.size()); + seed_vec.reserve(seeds.size()); seed_vec.assign(seeds.begin(), seeds.end()); } EngineBase(sycl::queue &q, std::uint64_t seed, std::vector &offsets) : - q_(q), seed_vec(1, seed), offset_vec(offsets) {} + q_(q), seed_vec(1, seed), offset_vec(offsets) { + validate_vec_size(offsets.size()); + } EngineBase(sycl::queue &q, std::vector &seeds, std::vector &offsets) : - q_(q), seed_vec(seeds), offset_vec(offsets) {} + q_(q), seed_vec(seeds), offset_vec(offsets) { + validate_vec_size(seeds.size()); + validate_vec_size(offsets.size()); + } EngineBase(sycl::queue &q, std::vector &seeds, std::vector &offsets) : q_(q), offset_vec(offsets) { + validate_vec_size(seeds.size()); + validate_vec_size(offsets.size()); + seed_vec.reserve(seeds.size()); seed_vec.assign(seeds.begin(), seeds.end()); } @@ -106,5 +124,8 @@ class EngineBase { std::vector& get_offsets() noexcept { return offset_vec; } + + // + static constexpr std::uint8_t max_vec_n = 1; }; } // dpnp::backend::ext::rng::device::engine diff --git a/dpnp/backend/extensions/rng/device/engine/builder/base_builder.hpp b/dpnp/backend/extensions/rng/device/engine/builder/base_builder.hpp index f2aa61fb8765..e5735e16975c 100644 --- a/dpnp/backend/extensions/rng/device/engine/builder/base_builder.hpp +++ b/dpnp/backend/extensions/rng/device/engine/builder/base_builder.hpp @@ -37,7 +37,7 @@ namespace dpnp::backend::ext::rng::device::engine::builder template class BaseBuilder { private: - static constexpr std::uint8_t max_n = 10; + static constexpr std::uint8_t max_n = EngineBase::max_vec_n; std::uint8_t no_of_seeds; std::uint8_t no_of_offsets; @@ -75,7 +75,7 @@ class BaseBuilder { { switch (no_of_seeds) { case 1: { - if constexpr (std::is_same_v>) { + if constexpr (std::is_same_v>) { // issue with mcg59<>() constructor which breaks compilation return EngineT(seeds[0], offsets[0]); } @@ -90,16 +90,16 @@ class BaseBuilder { return EngineT(); } - inline auto operator()(OffsetT offset) const + inline auto operator()(const OffsetT offset) const { switch (no_of_seeds) { case 1: { - if constexpr (std::is_same_v>) { + if constexpr (std::is_same_v>) { // issue with mcg59<>() constructor which breaks compilation - return EngineT(seeds[0], offsets[0]); + return EngineT(seeds[0], offsets[0] + offset); } else { - return EngineT({seeds[0]}, {offset}); + return EngineT({seeds[0]}, {offsets[0] + offset}); } } // TODO: implement full switch From 2414022a8d8ab46bafc78c8e203bf3a9a68be60d Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Thu, 21 Mar 2024 14:44:13 +0100 Subject: [PATCH 13/16] Exposed vector size of an engine --- .../rng/device/dispatch/table_builder.hpp | 11 +++-- .../device/engine/builder/base_builder.hpp | 1 + .../extensions/rng/device/gaussian.cpp | 46 +++++++++++++------ .../extensions/rng/device/gaussian.hpp | 1 + dpnp/backend/extensions/rng/device/rng_py.cpp | 3 +- 5 files changed, 42 insertions(+), 20 deletions(-) diff --git a/dpnp/backend/extensions/rng/device/dispatch/table_builder.hpp b/dpnp/backend/extensions/rng/device/dispatch/table_builder.hpp index 9a6c09f56228..5c3b932a2afa 100644 --- a/dpnp/backend/extensions/rng/device/dispatch/table_builder.hpp +++ b/dpnp/backend/extensions/rng/device/dispatch/table_builder.hpp @@ -77,12 +77,13 @@ class Dispatch3DTableBuilder Dispatch3DTableBuilder() = default; ~Dispatch3DTableBuilder() = default; - void populate(funcPtrT table[][_no_of_types][_no_of_methods]) const + template + void populate(funcPtrT table[][_no_of_types][_no_of_methods], std::integer_sequence) const { - const auto map_by_engine = {table_per_type_and_method>(), - table_per_type_and_method>(), - table_per_type_and_method>(), - table_per_type_and_method>()}; + const auto map_by_engine = {table_per_type_and_method>()..., + table_per_type_and_method>()..., + table_per_type_and_method>()..., + table_per_type_and_method>()...}; assert(map_by_engine.size() == _no_of_engines); std::uint16_t engine_id = 0; diff --git a/dpnp/backend/extensions/rng/device/engine/builder/base_builder.hpp b/dpnp/backend/extensions/rng/device/engine/builder/base_builder.hpp index e5735e16975c..af515ced15bf 100644 --- a/dpnp/backend/extensions/rng/device/engine/builder/base_builder.hpp +++ b/dpnp/backend/extensions/rng/device/engine/builder/base_builder.hpp @@ -111,6 +111,7 @@ class BaseBuilder { // TODO: remove void print() { + std::cout << "vector size = " << std::to_string(EngineT::vec_size) << std::endl; std::cout << "list_of_seeds: "; for (auto &val: seeds) { std::cout << std::to_string(val) << ", "; diff --git a/dpnp/backend/extensions/rng/device/gaussian.cpp b/dpnp/backend/extensions/rng/device/gaussian.cpp index 0dc81f75a369..0d7529b2077f 100644 --- a/dpnp/backend/extensions/rng/device/gaussian.cpp +++ b/dpnp/backend/extensions/rng/device/gaussian.cpp @@ -26,6 +26,7 @@ #include // dpctl tensor headers +#include "utils/output_validation.hpp" #include "utils/type_dispatch.hpp" #include "utils/type_utils.hpp" #include "kernels/alignment.hpp" @@ -51,7 +52,22 @@ using dpctl_krn_ns::disabled_sg_loadstore_wrapper_krn; using dpctl_krn_ns::is_aligned; using dpctl_krn_ns::required_alignment; -constexpr int no_of_methods = 2; // number of methods of gaussian distribution +constexpr auto no_of_methods = 2; // number of methods of gaussian distribution + +constexpr auto seq_of_vec_sizes = std::integer_sequence{}; +constexpr auto vec_sizes_len = seq_of_vec_sizes.size(); +constexpr auto no_of_engines = engine::no_of_engines * vec_sizes_len; + +template +inline auto find_vec_size_impl(const VecSizeT vec_size, std::index_sequence) { + return std::min({ ((Ints == vec_size) ? Indices : sizeof...(Indices))... }); +} + +template +int find_vec_size(const VecSizeT vec_size, std::integer_sequence) { + auto res = find_vec_size_impl(vec_size, std::make_index_sequence{}); + return (res == sizeof...(Ints)) ? -1 : res; +} template struct DistributorBuilder @@ -83,7 +99,7 @@ typedef sycl::event (*gaussian_impl_fn_ptr_t)(engine::EngineBase *engine, char *, const std::vector &); -static gaussian_impl_fn_ptr_t gaussian_dispatch_table[engine::no_of_engines][dpctl_td_ns::num_types][no_of_methods]; +static gaussian_impl_fn_ptr_t gaussian_dispatch_table[no_of_engines][dpctl_td_ns::num_types][no_of_methods]; template class gaussian_kernel; @@ -117,7 +133,7 @@ static sycl::event gaussian_impl(engine::EngineBase *engine, using EngineBuilderT = engine::builder::Builder; EngineBuilderT eng_builder(engine); - eng_builder.print(); // TODO: remove + // eng_builder.print(); // TODO: remove using DistributorBuilderT = DistributorBuilder; DistributorBuilderT dist_builder(mean, stddev); @@ -154,6 +170,7 @@ static sycl::event gaussian_impl(engine::EngineBase *engine, std::pair gaussian(engine::EngineBase *engine, const std::uint8_t method_id, + const std::uint8_t vec_size, const double mean, const double stddev, const std::uint64_t n, @@ -176,15 +193,10 @@ std::pair gaussian(engine::EngineBase *engine, } // ensure that output is ample enough to accommodate all elements - auto res_offsets = res.get_minmax_offsets(); - // destination must be ample enough to accommodate all elements - { - size_t range = - static_cast(res_offsets.second - res_offsets.first); - if (range + 1 < res_nelems) { - throw py::value_error( - "Destination array can not accommodate all the elements of source array."); - } + dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(res, res_nelems); + + if (!dpctl::utils::queues_are_compatible(exec_q, {res})) { + throw py::value_error("Execution queue is not compatible with the allocation queue"); } bool is_res_c_contig = res.is_c_contiguous(); @@ -201,6 +213,12 @@ std::pair gaussian(engine::EngineBase *engine, throw std::runtime_error("Unknown method=" + std::to_string(method_id) + " for gaussian distribution."); } + int vec_size_id = find_vec_size(vec_size, seq_of_vec_sizes); + if (vec_size_id < 0) { + throw std::runtime_error("Vector size=" + std::to_string(vec_size) + " is out of supported range"); + } + enginge_id = enginge_id * vec_sizes_len + vec_size_id; + auto array_types = dpctl_td_ns::usm_ndarray_types(); int res_type_id = array_types.typenum_to_lookup_id(res.get_typenum()); @@ -232,7 +250,7 @@ struct GaussianContigFactory void init_gaussian_dispatch_3d_table(void) { - dispatch::Dispatch3DTableBuilder contig; - contig.populate(gaussian_dispatch_table); + dispatch::Dispatch3DTableBuilder contig; + contig.populate(gaussian_dispatch_table, seq_of_vec_sizes); } } // dpnp::backend::ext::rng::device diff --git a/dpnp/backend/extensions/rng/device/gaussian.hpp b/dpnp/backend/extensions/rng/device/gaussian.hpp index 581222c96a78..7b8b36cd98b3 100644 --- a/dpnp/backend/extensions/rng/device/gaussian.hpp +++ b/dpnp/backend/extensions/rng/device/gaussian.hpp @@ -34,6 +34,7 @@ namespace dpnp::backend::ext::rng::device { extern std::pair gaussian(engine::EngineBase *engine, const std::uint8_t method_id, + const std::uint8_t vec_size, const double mean, const double stddev, const std::uint64_t n, diff --git a/dpnp/backend/extensions/rng/device/rng_py.cpp b/dpnp/backend/extensions/rng/device/rng_py.cpp index 9cbb3edba5b4..354061597b63 100644 --- a/dpnp/backend/extensions/rng/device/rng_py.cpp +++ b/dpnp/backend/extensions/rng/device/rng_py.cpp @@ -100,7 +100,8 @@ PYBIND11_MODULE(_rng_dev_impl, m) m.def("_gaussian", &rng_dev_ext::gaussian, "", py::arg("engine"), - py::arg("method"), py::arg("mean"), py::arg("stddev"), + py::arg("method_id"), py::arg("vec_size"), + py::arg("mean"), py::arg("stddev"), py::arg("n"), py::arg("res"), py::arg("depends") = py::list()); } From 1074670c01a986f9ec77e19aa9565856c6722bb1 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Mon, 25 Mar 2024 15:28:51 +0100 Subject: [PATCH 14/16] Applied pre-commit formatting rules --- dpnp/CMakeLists.txt | 1 - dpnp/backend/extensions/rng/CMakeLists.txt | 74 ---------- .../extensions/rng/device/common_impl.hpp | 46 +++++-- .../extensions/rng/device/dispatch/matrix.hpp | 28 ++-- .../rng/device/dispatch/table_builder.hpp | 55 ++++---- .../rng/device/engine/base_engine.hpp | 88 ++++++++---- .../device/engine/builder/base_builder.hpp | 64 +++++---- .../rng/device/engine/builder/builder.hpp | 11 +- .../rng/device/engine/builder/mcg31m1.hpp | 15 +- .../rng/device/engine/builder/mcg59.hpp | 15 +- .../rng/device/engine/builder/mrg32k3a.hpp | 15 +- .../device/engine/builder/philox4x32x10.hpp | 15 +- .../rng/device/engine/mcg31m1_engine.hpp | 23 ++-- .../rng/device/engine/mcg59_engine.hpp | 23 ++-- .../rng/device/engine/mrg32k3a_engine.hpp | 39 ++++-- .../device/engine/philox4x32x10_engine.hpp | 39 ++++-- .../extensions/rng/device/gaussian.cpp | 130 ++++++++++++------ .../extensions/rng/device/gaussian.hpp | 18 +-- dpnp/backend/extensions/rng/device/rng_py.cpp | 88 +++++++----- dpnp/backend/extensions/rng/distr_proxy.hpp | 115 ---------------- dpnp/backend/extensions/rng/engine_proxy.hpp | 85 ------------ dpnp/backend/extensions/rng/rng_py.cpp | 89 ------------ 22 files changed, 460 insertions(+), 616 deletions(-) delete mode 100644 dpnp/backend/extensions/rng/CMakeLists.txt delete mode 100644 dpnp/backend/extensions/rng/distr_proxy.hpp delete mode 100644 dpnp/backend/extensions/rng/engine_proxy.hpp delete mode 100644 dpnp/backend/extensions/rng/rng_py.cpp diff --git a/dpnp/CMakeLists.txt b/dpnp/CMakeLists.txt index 2e531b5fafcf..b4bdf13abbd1 100644 --- a/dpnp/CMakeLists.txt +++ b/dpnp/CMakeLists.txt @@ -58,7 +58,6 @@ build_dpnp_cython_ext_with_backend(dparray ${CMAKE_CURRENT_SOURCE_DIR}/dparray.p add_subdirectory(backend) add_subdirectory(backend/extensions/blas) add_subdirectory(backend/extensions/lapack) -add_subdirectory(backend/extensions/rng) add_subdirectory(backend/extensions/rng/device) add_subdirectory(backend/extensions/vm) add_subdirectory(backend/extensions/sycl_ext) diff --git a/dpnp/backend/extensions/rng/CMakeLists.txt b/dpnp/backend/extensions/rng/CMakeLists.txt deleted file mode 100644 index a3821e96dcc7..000000000000 --- a/dpnp/backend/extensions/rng/CMakeLists.txt +++ /dev/null @@ -1,74 +0,0 @@ -# ***************************************************************************** -# Copyright (c) 2023, Intel Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# - Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -# THE POSSIBILITY OF SUCH DAMAGE. -# ***************************************************************************** - - -set(python_module_name _rng_impl) -pybind11_add_module(${python_module_name} MODULE - rng_py.cpp -) - -if (WIN32) - if (${CMAKE_VERSION} VERSION_LESS "3.27") - # this is a work-around for target_link_options inserting option after -link option, cause - # linker to ignore it. - set(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -fsycl-device-code-split=per_kernel") - endif() -endif() - -set_target_properties(${python_module_name} PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON) - -target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include) -target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src) - -target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIRS}) -target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR}) - -if (WIN32) - target_compile_options(${python_module_name} PRIVATE - /clang:-fno-approx-func - /clang:-fno-finite-math-only - ) -else() - target_compile_options(${python_module_name} PRIVATE - -fno-approx-func - -fno-finite-math-only - ) -endif() - -target_link_options(${python_module_name} PUBLIC -fsycl-device-code-split=per_kernel) -if (UNIX) - # this option is support on Linux only - target_link_options(${python_module_name} PUBLIC -fsycl-link-huge-device-code) -endif() - -if (DPNP_GENERATE_COVERAGE) - target_link_options(${python_module_name} PRIVATE -fprofile-instr-generate -fcoverage-mapping) -endif() - -target_link_libraries(${python_module_name} PUBLIC MKL::MKL_DPCPP) - -install(TARGETS ${python_module_name} - DESTINATION "dpnp/backend/extensions/rng" -) diff --git a/dpnp/backend/extensions/rng/device/common_impl.hpp b/dpnp/backend/extensions/rng/device/common_impl.hpp index 405d70820232..e7e32baedb26 100644 --- a/dpnp/backend/extensions/rng/device/common_impl.hpp +++ b/dpnp/backend/extensions/rng/device/common_impl.hpp @@ -27,8 +27,8 @@ #include -#include #include +#include namespace dpnp { @@ -57,11 +57,14 @@ struct RngContigFunctor EngineBuilderT engine_; DistributorBuilderT distr_; - DataT * const res_ = nullptr; + DataT *const res_ = nullptr; const std::size_t nelems_; public: - RngContigFunctor(EngineBuilderT& engine, DistributorBuilderT& distr, DataT *res, const std::size_t n_elems) + RngContigFunctor(EngineBuilderT &engine, + DistributorBuilderT &distr, + DataT *res, + const std::size_t n_elems) : engine_(engine), distr_(distr), res_(res), nelems_(n_elems) { } @@ -82,21 +85,33 @@ struct RngContigFunctor DistrT distr = distr_(); if constexpr (enable_sg_load) { - const std::size_t base = vi_per_wi * (nd_it.get_group(0) * nd_it.get_local_range(0) + sg.get_group_id()[0] * max_sg_size); + const std::size_t base = + vi_per_wi * (nd_it.get_group(0) * nd_it.get_local_range(0) + + sg.get_group_id()[0] * max_sg_size); - if ((sg_size == max_sg_size) && (base + vi_per_wi * sg_size < nelems_)) { + if ((sg_size == max_sg_size) && + (base + vi_per_wi * sg_size < nelems_)) { #pragma unroll for (std::uint16_t it = 0; it < vi_per_wi; it += vec_sz) { - std::size_t offset = base + static_cast(it) * static_cast(sg_size); - auto out_multi_ptr = sycl::address_space_cast(&res_[offset]); - - sycl::vec rng_val_vec = mkl_rng_dev::generate(distr, engine); + std::size_t offset = + base + static_cast(it) * + static_cast(sg_size); + auto out_multi_ptr = sycl::address_space_cast< + sycl::access::address_space::global_space, + sycl::access::decorated::yes>(&res_[offset]); + + sycl::vec rng_val_vec = + mkl_rng_dev::generate(distr, engine); sg.store(out_multi_ptr, rng_val_vec); } } else { - for (std::size_t offset = base + sg.get_local_id()[0]; offset < nelems_; offset += sg_size) { - res_[offset] = mkl_rng_dev::generate_single(distr, engine); + for (std::size_t offset = base + sg.get_local_id()[0]; + offset < nelems_; offset += sg_size) + { + res_[offset] = + mkl_rng_dev::generate_single(distr, + engine); } } } @@ -104,9 +119,12 @@ struct RngContigFunctor std::size_t base = nd_it.get_global_linear_id(); base = (base / sg_size) * sg_size * vi_per_wi + (base % sg_size); - for (std::size_t offset = base; offset < std::min(nelems_, base + sg_size * vi_per_wi); offset += sg_size) + for (std::size_t offset = base; + offset < std::min(nelems_, base + sg_size * vi_per_wi); + offset += sg_size) { - res_[offset] = mkl_rng_dev::generate_single(distr, engine); + res_[offset] = mkl_rng_dev::generate_single( + distr, engine); } } } @@ -116,4 +134,4 @@ struct RngContigFunctor } // namespace rng } // namespace ext } // namespace backend -} // namespace dpnp \ No newline at end of file +} // namespace dpnp diff --git a/dpnp/backend/extensions/rng/device/dispatch/matrix.hpp b/dpnp/backend/extensions/rng/device/dispatch/matrix.hpp index d2e5b42fbc25..5b1f87ed66c6 100644 --- a/dpnp/backend/extensions/rng/device/dispatch/matrix.hpp +++ b/dpnp/backend/extensions/rng/device/dispatch/matrix.hpp @@ -29,15 +29,15 @@ #include "utils/type_dispatch.hpp" - namespace dpnp::backend::ext::rng::device::dispatch { namespace dpctl_td_ns = dpctl::tensor::type_dispatch; namespace mkl_rng_dev = oneapi::mkl::rng::device; template -struct TypePairDefinedEntry : std::bool_constant && - std::is_same_v> +struct TypePairDefinedEntry + : std::bool_constant && + std::is_same_v> { static constexpr bool is_defined = true; }; @@ -46,11 +46,23 @@ template struct GaussianTypePairSupportFactory { static constexpr bool is_defined = std::disjunction< - TypePairDefinedEntry, - TypePairDefinedEntry, - TypePairDefinedEntry, - TypePairDefinedEntry, + TypePairDefinedEntry, + TypePairDefinedEntry, + TypePairDefinedEntry, + TypePairDefinedEntry, // fall-through dpctl_td_ns::NotDefinedEntry>::is_defined; }; -} // dpnp::backend::ext::rng::device::dispatch +} // namespace dpnp::backend::ext::rng::device::dispatch diff --git a/dpnp/backend/extensions/rng/device/dispatch/table_builder.hpp b/dpnp/backend/extensions/rng/device/dispatch/table_builder.hpp index 5c3b932a2afa..e84f17534949 100644 --- a/dpnp/backend/extensions/rng/device/dispatch/table_builder.hpp +++ b/dpnp/backend/extensions/rng/device/dispatch/table_builder.hpp @@ -27,13 +27,13 @@ #include - namespace dpnp::backend::ext::rng::device::dispatch { namespace mkl_rng_dev = oneapi::mkl::rng::device; template typename factory, + template + typename factory, int _no_of_engines, int _no_of_types, int _no_of_methods> @@ -44,8 +44,10 @@ class Dispatch3DTableBuilder const std::vector row_per_method() const { std::vector per_method = { - factory{}.get(), - factory{}.get(), + factory{} + .get(), + factory{} + .get(), }; assert(per_method.size() == _no_of_methods); return per_method; @@ -54,21 +56,21 @@ class Dispatch3DTableBuilder template auto table_per_type_and_method() const { - std::vector> - table_by_type = {row_per_method(), - row_per_method(), - row_per_method(), - row_per_method(), - row_per_method(), - row_per_method(), - row_per_method(), - row_per_method(), - row_per_method(), - row_per_method(), - row_per_method(), - row_per_method(), - row_per_method>(), - row_per_method>()}; + std::vector> table_by_type = { + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method>(), + row_per_method>()}; assert(table_by_type.size() == _no_of_types); return table_by_type; } @@ -78,12 +80,15 @@ class Dispatch3DTableBuilder ~Dispatch3DTableBuilder() = default; template - void populate(funcPtrT table[][_no_of_types][_no_of_methods], std::integer_sequence) const + void populate(funcPtrT table[][_no_of_types][_no_of_methods], + std::integer_sequence) const { - const auto map_by_engine = {table_per_type_and_method>()..., - table_per_type_and_method>()..., - table_per_type_and_method>()..., - table_per_type_and_method>()...}; + const auto map_by_engine = { + table_per_type_and_method>()..., + table_per_type_and_method< + mkl_rng_dev::philox4x32x10>()..., + table_per_type_and_method>()..., + table_per_type_and_method>()...}; assert(map_by_engine.size() == _no_of_engines); std::uint16_t engine_id = 0; @@ -101,4 +106,4 @@ class Dispatch3DTableBuilder } } }; -} // dpnp::backend::ext::rng::device::dispatch +} // namespace dpnp::backend::ext::rng::device::dispatch diff --git a/dpnp/backend/extensions/rng/device/engine/base_engine.hpp b/dpnp/backend/extensions/rng/device/engine/base_engine.hpp index a14a52ebe46e..d6f49595c06b 100644 --- a/dpnp/backend/extensions/rng/device/engine/base_engine.hpp +++ b/dpnp/backend/extensions/rng/device/engine/base_engine.hpp @@ -27,12 +27,13 @@ #include - namespace dpnp::backend::ext::rng::device::engine { -class EngineType { +class EngineType +{ public: - enum Type : std::uint8_t { + enum Type : std::uint8_t + { MRG32k3a = 0, PHILOX4x32x10, MCG31M1, @@ -43,28 +44,32 @@ class EngineType { EngineType() = default; constexpr EngineType(Type type) : type_(type) {} - constexpr std::uint8_t id() const { + constexpr std::uint8_t id() const + { return static_cast(type_); } - static constexpr std::uint8_t base_id() { + static constexpr std::uint8_t base_id() + { return EngineType(Base).id(); } private: - Type type_; + Type type_; }; // A total number of supported engines == EngineType::Base constexpr std::uint8_t no_of_engines = EngineType::base_id(); -class EngineBase { +class EngineBase +{ private: sycl::queue q_{}; std::vector seed_vec{}; std::vector offset_vec{}; - void validate_vec_size(const std::size_t size) { + void validate_vec_size(const std::size_t size) + { if (size > max_vec_n) { throw std::runtime_error("TODO: add text"); } @@ -73,33 +78,52 @@ class EngineBase { public: EngineBase() {} - EngineBase(sycl::queue &q, std::uint64_t seed, std::uint64_t offset) : - q_(q), seed_vec(1, seed), offset_vec(1, offset) {} + EngineBase(sycl::queue &q, std::uint64_t seed, std::uint64_t offset) + : q_(q), seed_vec(1, seed), offset_vec(1, offset) + { + } - EngineBase(sycl::queue &q, std::vector &seeds, std::uint64_t offset) : - q_(q), seed_vec(seeds), offset_vec(1, offset) { - validate_vec_size(seeds.size()); - } + EngineBase(sycl::queue &q, + std::vector &seeds, + std::uint64_t offset) + : q_(q), seed_vec(seeds), offset_vec(1, offset) + { + validate_vec_size(seeds.size()); + } - EngineBase(sycl::queue &q, std::vector &seeds, std::uint64_t offset) : q_(q), offset_vec(1, offset) { + EngineBase(sycl::queue &q, + std::vector &seeds, + std::uint64_t offset) + : q_(q), offset_vec(1, offset) + { validate_vec_size(seeds.size()); seed_vec.reserve(seeds.size()); seed_vec.assign(seeds.begin(), seeds.end()); } - EngineBase(sycl::queue &q, std::uint64_t seed, std::vector &offsets) : - q_(q), seed_vec(1, seed), offset_vec(offsets) { - validate_vec_size(offsets.size()); - } + EngineBase(sycl::queue &q, + std::uint64_t seed, + std::vector &offsets) + : q_(q), seed_vec(1, seed), offset_vec(offsets) + { + validate_vec_size(offsets.size()); + } - EngineBase(sycl::queue &q, std::vector &seeds, std::vector &offsets) : - q_(q), seed_vec(seeds), offset_vec(offsets) { - validate_vec_size(seeds.size()); - validate_vec_size(offsets.size()); - } + EngineBase(sycl::queue &q, + std::vector &seeds, + std::vector &offsets) + : q_(q), seed_vec(seeds), offset_vec(offsets) + { + validate_vec_size(seeds.size()); + validate_vec_size(offsets.size()); + } - EngineBase(sycl::queue &q, std::vector &seeds, std::vector &offsets) : q_(q), offset_vec(offsets) { + EngineBase(sycl::queue &q, + std::vector &seeds, + std::vector &offsets) + : q_(q), offset_vec(offsets) + { validate_vec_size(seeds.size()); validate_vec_size(offsets.size()); @@ -109,23 +133,27 @@ class EngineBase { virtual ~EngineBase() {} - virtual EngineType get_type() const noexcept { + virtual EngineType get_type() const noexcept + { return EngineType::Base; } - sycl::queue &get_queue() noexcept { + sycl::queue &get_queue() noexcept + { return q_; } - std::vector& get_seeds() noexcept { + std::vector &get_seeds() noexcept + { return seed_vec; } - std::vector& get_offsets() noexcept { + std::vector &get_offsets() noexcept + { return offset_vec; } // static constexpr std::uint8_t max_vec_n = 1; }; -} // dpnp::backend::ext::rng::device::engine +} // namespace dpnp::backend::ext::rng::device::engine diff --git a/dpnp/backend/extensions/rng/device/engine/builder/base_builder.hpp b/dpnp/backend/extensions/rng/device/engine/builder/base_builder.hpp index af515ced15bf..b6f0fea3ffd4 100644 --- a/dpnp/backend/extensions/rng/device/engine/builder/base_builder.hpp +++ b/dpnp/backend/extensions/rng/device/engine/builder/base_builder.hpp @@ -31,11 +31,11 @@ #include namespace mkl_rng_dev = oneapi::mkl::rng::device; - namespace dpnp::backend::ext::rng::device::engine::builder { template -class BaseBuilder { +class BaseBuilder +{ private: static constexpr std::uint8_t max_n = EngineBase::max_vec_n; @@ -74,18 +74,21 @@ class BaseBuilder { inline auto operator()(void) const { switch (no_of_seeds) { - case 1: { - if constexpr (std::is_same_v>) { - // issue with mcg59<>() constructor which breaks compilation - return EngineT(seeds[0], offsets[0]); - } - else { - return EngineT({seeds[0]}, offsets[0]); - } + case 1: + { + if constexpr (std::is_same_v>) + { + // issue with mcg59<>() constructor which breaks compilation + return EngineT(seeds[0], offsets[0]); } - // TODO: implement full switch - default: - break; + else { + return EngineT({seeds[0]}, offsets[0]); + } + } + // TODO: implement full switch + default: + break; } return EngineT(); } @@ -93,30 +96,35 @@ class BaseBuilder { inline auto operator()(const OffsetT offset) const { switch (no_of_seeds) { - case 1: { - if constexpr (std::is_same_v>) { - // issue with mcg59<>() constructor which breaks compilation - return EngineT(seeds[0], offsets[0] + offset); - } - else { - return EngineT({seeds[0]}, {offsets[0] + offset}); - } + case 1: + { + if constexpr (std::is_same_v>) + { + // issue with mcg59<>() constructor which breaks compilation + return EngineT(seeds[0], offsets[0] + offset); } - // TODO: implement full switch - default: - break; + else { + return EngineT({seeds[0]}, {offsets[0] + offset}); + } + } + // TODO: implement full switch + default: + break; } return EngineT(); } // TODO: remove - void print() { - std::cout << "vector size = " << std::to_string(EngineT::vec_size) << std::endl; + void print() + { + std::cout << "vector size = " << std::to_string(EngineT::vec_size) + << std::endl; std::cout << "list_of_seeds: "; - for (auto &val: seeds) { + for (auto &val : seeds) { std::cout << std::to_string(val) << ", "; } std::cout << std::endl; } }; -} // dpnp::backend::ext::rng::device::engine::builder +} // namespace dpnp::backend::ext::rng::device::engine::builder diff --git a/dpnp/backend/extensions/rng/device/engine/builder/builder.hpp b/dpnp/backend/extensions/rng/device/engine/builder/builder.hpp index d1bc4643ddc7..1fd9c9c9e89b 100644 --- a/dpnp/backend/extensions/rng/device/engine/builder/builder.hpp +++ b/dpnp/backend/extensions/rng/device/engine/builder/builder.hpp @@ -25,14 +25,15 @@ #pragma once - namespace dpnp::backend::ext::rng::device::engine::builder { template -class Builder {}; -} // dpnp::backend::ext::rng::device::engine::builder +class Builder +{ +}; +} // namespace dpnp::backend::ext::rng::device::engine::builder -#include "mrg32k3a.hpp" -#include "philox4x32x10.hpp" #include "mcg31m1.hpp" #include "mcg59.hpp" +#include "mrg32k3a.hpp" +#include "philox4x32x10.hpp" diff --git a/dpnp/backend/extensions/rng/device/engine/builder/mcg31m1.hpp b/dpnp/backend/extensions/rng/device/engine/builder/mcg31m1.hpp index 8e99afde372e..b94b93b35b38 100644 --- a/dpnp/backend/extensions/rng/device/engine/builder/mcg31m1.hpp +++ b/dpnp/backend/extensions/rng/device/engine/builder/mcg31m1.hpp @@ -27,18 +27,25 @@ #include -#include "base_engine.hpp" #include "base_builder.hpp" +#include "base_engine.hpp" namespace dpnp::backend::ext::rng::device::engine::builder { namespace mkl_rng_dev = oneapi::mkl::rng::device; template -class Builder> : public BaseBuilder, std::uint32_t, std::uint64_t> { +class Builder> + : public BaseBuilder, + std::uint32_t, + std::uint64_t> +{ public: using EngineType = mkl_rng_dev::mcg31m1; - Builder(EngineBase *engine) : BaseBuilder(engine) {} + Builder(EngineBase *engine) + : BaseBuilder(engine) + { + } }; -} // dpnp::backend::ext::rng::device::engine::builder +} // namespace dpnp::backend::ext::rng::device::engine::builder diff --git a/dpnp/backend/extensions/rng/device/engine/builder/mcg59.hpp b/dpnp/backend/extensions/rng/device/engine/builder/mcg59.hpp index d4bc754d8bde..7f8e003b87cf 100644 --- a/dpnp/backend/extensions/rng/device/engine/builder/mcg59.hpp +++ b/dpnp/backend/extensions/rng/device/engine/builder/mcg59.hpp @@ -27,18 +27,25 @@ #include -#include "base_engine.hpp" #include "base_builder.hpp" +#include "base_engine.hpp" namespace dpnp::backend::ext::rng::device::engine::builder { namespace mkl_rng_dev = oneapi::mkl::rng::device; template -class Builder> : public BaseBuilder, std::uint32_t, std::uint64_t> { +class Builder> + : public BaseBuilder, + std::uint32_t, + std::uint64_t> +{ public: using EngineType = mkl_rng_dev::mcg59; - Builder(EngineBase *engine) : BaseBuilder(engine) {} + Builder(EngineBase *engine) + : BaseBuilder(engine) + { + } }; -} // dpnp::backend::ext::rng::device::engine::builder +} // namespace dpnp::backend::ext::rng::device::engine::builder diff --git a/dpnp/backend/extensions/rng/device/engine/builder/mrg32k3a.hpp b/dpnp/backend/extensions/rng/device/engine/builder/mrg32k3a.hpp index 940df64f3a02..a5ab3470b03a 100644 --- a/dpnp/backend/extensions/rng/device/engine/builder/mrg32k3a.hpp +++ b/dpnp/backend/extensions/rng/device/engine/builder/mrg32k3a.hpp @@ -27,18 +27,25 @@ #include -#include "base_engine.hpp" #include "base_builder.hpp" +#include "base_engine.hpp" namespace dpnp::backend::ext::rng::device::engine::builder { namespace mkl_rng_dev = oneapi::mkl::rng::device; template -class Builder> : public BaseBuilder, std::uint32_t, std::uint64_t> { +class Builder> + : public BaseBuilder, + std::uint32_t, + std::uint64_t> +{ public: using EngineType = mkl_rng_dev::mrg32k3a; - Builder(EngineBase *engine) : BaseBuilder(engine) {} + Builder(EngineBase *engine) + : BaseBuilder(engine) + { + } }; -} // dpnp::backend::ext::rng::device::engine::builder +} // namespace dpnp::backend::ext::rng::device::engine::builder diff --git a/dpnp/backend/extensions/rng/device/engine/builder/philox4x32x10.hpp b/dpnp/backend/extensions/rng/device/engine/builder/philox4x32x10.hpp index b0abc5c86b86..932c5ee00d76 100644 --- a/dpnp/backend/extensions/rng/device/engine/builder/philox4x32x10.hpp +++ b/dpnp/backend/extensions/rng/device/engine/builder/philox4x32x10.hpp @@ -27,18 +27,25 @@ #include -#include "base_engine.hpp" #include "base_builder.hpp" +#include "base_engine.hpp" namespace dpnp::backend::ext::rng::device::engine::builder { namespace mkl_rng_dev = oneapi::mkl::rng::device; template -class Builder> : public BaseBuilder, std::uint64_t, std::uint64_t> { +class Builder> + : public BaseBuilder, + std::uint64_t, + std::uint64_t> +{ public: using EngineType = mkl_rng_dev::philox4x32x10; - Builder(EngineBase *engine) : BaseBuilder(engine) {} + Builder(EngineBase *engine) + : BaseBuilder(engine) + { + } }; -} // dpnp::backend::ext::rng::device::engine::builder +} // namespace dpnp::backend::ext::rng::device::engine::builder diff --git a/dpnp/backend/extensions/rng/device/engine/mcg31m1_engine.hpp b/dpnp/backend/extensions/rng/device/engine/mcg31m1_engine.hpp index 2a8cdee310c7..4022bd33c18a 100644 --- a/dpnp/backend/extensions/rng/device/engine/mcg31m1_engine.hpp +++ b/dpnp/backend/extensions/rng/device/engine/mcg31m1_engine.hpp @@ -27,19 +27,26 @@ #include "base_engine.hpp" - namespace dpnp::backend::ext::rng::device::engine { -class MCG31M1 : public EngineBase { +class MCG31M1 : public EngineBase +{ public: - MCG31M1(sycl::queue &q, std::uint32_t seed, std::uint64_t offset = 0) : - EngineBase(q, seed, offset) {} + MCG31M1(sycl::queue &q, std::uint32_t seed, std::uint64_t offset = 0) + : EngineBase(q, seed, offset) + { + } - MCG31M1(sycl::queue &q, std::vector &seeds, std::uint64_t offset = 0) : - EngineBase(q, seeds, offset) {} + MCG31M1(sycl::queue &q, + std::vector &seeds, + std::uint64_t offset = 0) + : EngineBase(q, seeds, offset) + { + } - virtual EngineType get_type() const noexcept override { + virtual EngineType get_type() const noexcept override + { return EngineType::MCG31M1; } }; -} // dpnp::backend::ext::rng::device::engine +} // namespace dpnp::backend::ext::rng::device::engine diff --git a/dpnp/backend/extensions/rng/device/engine/mcg59_engine.hpp b/dpnp/backend/extensions/rng/device/engine/mcg59_engine.hpp index 313a351426b6..e0fbf1741a3f 100644 --- a/dpnp/backend/extensions/rng/device/engine/mcg59_engine.hpp +++ b/dpnp/backend/extensions/rng/device/engine/mcg59_engine.hpp @@ -27,19 +27,26 @@ #include "base_engine.hpp" - namespace dpnp::backend::ext::rng::device::engine { -class MCG59 : public EngineBase { +class MCG59 : public EngineBase +{ public: - MCG59(sycl::queue &q, std::uint32_t seed, std::uint64_t offset = 0) : - EngineBase(q, seed, offset) {} + MCG59(sycl::queue &q, std::uint32_t seed, std::uint64_t offset = 0) + : EngineBase(q, seed, offset) + { + } - MCG59(sycl::queue &q, std::vector &seeds, std::uint64_t offset = 0) : - EngineBase(q, seeds, offset) {} + MCG59(sycl::queue &q, + std::vector &seeds, + std::uint64_t offset = 0) + : EngineBase(q, seeds, offset) + { + } - virtual EngineType get_type() const noexcept override { + virtual EngineType get_type() const noexcept override + { return EngineType::MCG59; } }; -} // dpnp::backend::ext::rng::device::engine +} // namespace dpnp::backend::ext::rng::device::engine diff --git a/dpnp/backend/extensions/rng/device/engine/mrg32k3a_engine.hpp b/dpnp/backend/extensions/rng/device/engine/mrg32k3a_engine.hpp index a0fcad167afb..d6657d6d8d8f 100644 --- a/dpnp/backend/extensions/rng/device/engine/mrg32k3a_engine.hpp +++ b/dpnp/backend/extensions/rng/device/engine/mrg32k3a_engine.hpp @@ -27,25 +27,40 @@ #include "base_engine.hpp" - namespace dpnp::backend::ext::rng::device::engine { -class MRG32k3a : public EngineBase { +class MRG32k3a : public EngineBase +{ public: - MRG32k3a(sycl::queue &q, std::uint32_t seed, std::uint64_t offset = 0) : - EngineBase(q, seed, offset) {} + MRG32k3a(sycl::queue &q, std::uint32_t seed, std::uint64_t offset = 0) + : EngineBase(q, seed, offset) + { + } - MRG32k3a(sycl::queue &q, std::vector &seeds, std::uint64_t offset = 0) : - EngineBase(q, seeds, offset) {} + MRG32k3a(sycl::queue &q, + std::vector &seeds, + std::uint64_t offset = 0) + : EngineBase(q, seeds, offset) + { + } - MRG32k3a(sycl::queue &q, std::uint32_t seed, std::vector &offsets) : - EngineBase(q, seed, offsets) {} + MRG32k3a(sycl::queue &q, + std::uint32_t seed, + std::vector &offsets) + : EngineBase(q, seed, offsets) + { + } - MRG32k3a(sycl::queue &q, std::vector &seeds, std::vector &offsets) : - EngineBase(q, seeds, offsets) {} + MRG32k3a(sycl::queue &q, + std::vector &seeds, + std::vector &offsets) + : EngineBase(q, seeds, offsets) + { + } - virtual EngineType get_type() const noexcept override { + virtual EngineType get_type() const noexcept override + { return EngineType::MRG32k3a; } }; -} // dpnp::backend::ext::rng::device::engine +} // namespace dpnp::backend::ext::rng::device::engine diff --git a/dpnp/backend/extensions/rng/device/engine/philox4x32x10_engine.hpp b/dpnp/backend/extensions/rng/device/engine/philox4x32x10_engine.hpp index bba8a5c63eb2..bef1e7d2119b 100644 --- a/dpnp/backend/extensions/rng/device/engine/philox4x32x10_engine.hpp +++ b/dpnp/backend/extensions/rng/device/engine/philox4x32x10_engine.hpp @@ -27,25 +27,40 @@ #include "base_engine.hpp" - namespace dpnp::backend::ext::rng::device::engine { -class PHILOX4x32x10 : public EngineBase { +class PHILOX4x32x10 : public EngineBase +{ public: - PHILOX4x32x10(sycl::queue &q, std::uint64_t seed, std::uint64_t offset = 0) : - EngineBase(q, seed, offset) {} + PHILOX4x32x10(sycl::queue &q, std::uint64_t seed, std::uint64_t offset = 0) + : EngineBase(q, seed, offset) + { + } - PHILOX4x32x10(sycl::queue &q, std::vector &seeds, std::uint64_t offset = 0) : - EngineBase(q, seeds, offset) {} + PHILOX4x32x10(sycl::queue &q, + std::vector &seeds, + std::uint64_t offset = 0) + : EngineBase(q, seeds, offset) + { + } - PHILOX4x32x10(sycl::queue &q, std::uint64_t seed, std::vector &offsets) : - EngineBase(q, seed, offsets) {} + PHILOX4x32x10(sycl::queue &q, + std::uint64_t seed, + std::vector &offsets) + : EngineBase(q, seed, offsets) + { + } - PHILOX4x32x10(sycl::queue &q, std::vector &seeds, std::vector &offsets) : - EngineBase(q, seeds, offsets) {} + PHILOX4x32x10(sycl::queue &q, + std::vector &seeds, + std::vector &offsets) + : EngineBase(q, seeds, offsets) + { + } - virtual EngineType get_type() const noexcept override { + virtual EngineType get_type() const noexcept override + { return EngineType::PHILOX4x32x10; } }; -} // dpnp::backend::ext::rng::device::engine +} // namespace dpnp::backend::ext::rng::device::engine diff --git a/dpnp/backend/extensions/rng/device/gaussian.cpp b/dpnp/backend/extensions/rng/device/gaussian.cpp index 0d7529b2077f..74866c628aa0 100644 --- a/dpnp/backend/extensions/rng/device/gaussian.cpp +++ b/dpnp/backend/extensions/rng/device/gaussian.cpp @@ -26,20 +26,19 @@ #include // dpctl tensor headers +#include "kernels/alignment.hpp" #include "utils/output_validation.hpp" #include "utils/type_dispatch.hpp" #include "utils/type_utils.hpp" -#include "kernels/alignment.hpp" -#include "gaussian.hpp" #include "common_impl.hpp" +#include "gaussian.hpp" #include "engine/builder/builder.hpp" #include "dispatch/matrix.hpp" #include "dispatch/table_builder.hpp" - namespace dpnp::backend::ext::rng::device { namespace dpctl_krn_ns = dpctl::tensor::kernels::alignment_utils; @@ -54,18 +53,24 @@ using dpctl_krn_ns::required_alignment; constexpr auto no_of_methods = 2; // number of methods of gaussian distribution -constexpr auto seq_of_vec_sizes = std::integer_sequence{}; +constexpr auto seq_of_vec_sizes = + std::integer_sequence{}; constexpr auto vec_sizes_len = seq_of_vec_sizes.size(); constexpr auto no_of_engines = engine::no_of_engines * vec_sizes_len; -template -inline auto find_vec_size_impl(const VecSizeT vec_size, std::index_sequence) { - return std::min({ ((Ints == vec_size) ? Indices : sizeof...(Indices))... }); +template +inline auto find_vec_size_impl(const VecSizeT vec_size, + std::index_sequence) +{ + return std::min({((Ints == vec_size) ? Indices : sizeof...(Indices))...}); } -template -int find_vec_size(const VecSizeT vec_size, std::integer_sequence) { - auto res = find_vec_size_impl(vec_size, std::make_index_sequence{}); +template +int find_vec_size(const VecSizeT vec_size, + std::integer_sequence) +{ + auto res = find_vec_size_impl( + vec_size, std::make_index_sequence{}); return (res == sizeof...(Ints)) ? -1 : res; } @@ -99,9 +104,14 @@ typedef sycl::event (*gaussian_impl_fn_ptr_t)(engine::EngineBase *engine, char *, const std::vector &); -static gaussian_impl_fn_ptr_t gaussian_dispatch_table[no_of_engines][dpctl_td_ns::num_types][no_of_methods]; +static gaussian_impl_fn_ptr_t gaussian_dispatch_table[no_of_engines] + [dpctl_td_ns::num_types] + [no_of_methods]; -template +template class gaussian_kernel; template @@ -123,7 +133,8 @@ static sycl::event gaussian_impl(engine::EngineBase *engine, constexpr std::size_t items_per_wi = 4; constexpr std::size_t local_size = 256; const std::size_t wg_items = local_size * vec_sz * items_per_wi; - const std::size_t global_size = ((n + wg_items - 1) / (wg_items)) * local_size; + const std::size_t global_size = + ((n + wg_items - 1) / (wg_items)) * local_size; sycl::event distr_event; @@ -140,42 +151,57 @@ static sycl::event gaussian_impl(engine::EngineBase *engine, if (is_aligned(out_ptr)) { constexpr bool enable_sg_load = true; - using KernelName = gaussian_kernel; - - cgh.parallel_for(sycl::nd_range<1>({global_size}, {local_size}), - details::RngContigFunctor(eng_builder, dist_builder, out, n)); + using KernelName = + gaussian_kernel; + + cgh.parallel_for( + sycl::nd_range<1>({global_size}, {local_size}), + details::RngContigFunctor( + eng_builder, dist_builder, out, n)); } else { constexpr bool disable_sg_load = false; - using InnerKernelName = gaussian_kernel; - using KernelName = disabled_sg_loadstore_wrapper_krn; - - cgh.parallel_for(sycl::nd_range<1>({global_size}, {local_size}), - details::RngContigFunctor(eng_builder, dist_builder, out, n)); + using InnerKernelName = + gaussian_kernel; + using KernelName = + disabled_sg_loadstore_wrapper_krn; + + cgh.parallel_for( + sycl::nd_range<1>({global_size}, {local_size}), + details::RngContigFunctor( + eng_builder, dist_builder, out, n)); } }); } catch (oneapi::mkl::exception const &e) { std::stringstream error_msg; - error_msg << "Unexpected MKL exception caught during gaussian call:\nreason: " << e.what(); + error_msg + << "Unexpected MKL exception caught during gaussian call:\nreason: " + << e.what(); throw std::runtime_error(error_msg.str()); } catch (sycl::exception const &e) { std::stringstream error_msg; - error_msg << "Unexpected SYCL exception caught during gaussian call:\n" << e.what(); + error_msg << "Unexpected SYCL exception caught during gaussian call:\n" + << e.what(); throw std::runtime_error(error_msg.str()); } return distr_event; } -std::pair gaussian(engine::EngineBase *engine, - const std::uint8_t method_id, - const std::uint8_t vec_size, - const double mean, - const double stddev, - const std::uint64_t n, - dpctl::tensor::usm_ndarray res, - const std::vector &depends) +std::pair + gaussian(engine::EngineBase *engine, + const std::uint8_t method_id, + const std::uint8_t vec_size, + const double mean, + const double stddev, + const std::uint64_t n, + dpctl::tensor::usm_ndarray res, + const std::vector &depends) { auto &exec_q = engine->get_queue(); @@ -196,42 +222,52 @@ std::pair gaussian(engine::EngineBase *engine, dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(res, res_nelems); if (!dpctl::utils::queues_are_compatible(exec_q, {res})) { - throw py::value_error("Execution queue is not compatible with the allocation queue"); + throw py::value_error( + "Execution queue is not compatible with the allocation queue"); } bool is_res_c_contig = res.is_c_contiguous(); if (!is_res_c_contig) { - throw std::runtime_error("Only population of contiguous array is supported."); + throw std::runtime_error( + "Only population of contiguous array is supported."); } auto enginge_id = engine->get_type().id(); if (enginge_id >= engine::no_of_engines) { - throw std::runtime_error("Unknown engine type=" + std::to_string(enginge_id) + " for gaussian distribution."); + throw std::runtime_error( + "Unknown engine type=" + std::to_string(enginge_id) + + " for gaussian distribution."); } if (method_id >= no_of_methods) { - throw std::runtime_error("Unknown method=" + std::to_string(method_id) + " for gaussian distribution."); + throw std::runtime_error("Unknown method=" + std::to_string(method_id) + + " for gaussian distribution."); } int vec_size_id = find_vec_size(vec_size, seq_of_vec_sizes); if (vec_size_id < 0) { - throw std::runtime_error("Vector size=" + std::to_string(vec_size) + " is out of supported range"); + throw std::runtime_error("Vector size=" + std::to_string(vec_size) + + " is out of supported range"); } enginge_id = enginge_id * vec_sizes_len + vec_size_id; auto array_types = dpctl_td_ns::usm_ndarray_types(); int res_type_id = array_types.typenum_to_lookup_id(res.get_typenum()); - auto gaussian_fn = gaussian_dispatch_table[enginge_id][res_type_id][method_id]; + auto gaussian_fn = + gaussian_dispatch_table[enginge_id][res_type_id][method_id]; if (gaussian_fn == nullptr) { - throw py::value_error("No gaussian implementation defined for a required type"); + throw py::value_error( + "No gaussian implementation defined for a required type"); } char *res_data = res.get_data(); - sycl::event gaussian_ev = gaussian_fn(engine, mean, stddev, n, res_data, depends); + sycl::event gaussian_ev = + gaussian_fn(engine, mean, stddev, n, res_data, depends); - sycl::event ht_ev = dpctl::utils::keep_args_alive(exec_q, {res}, {gaussian_ev}); - return std::make_pair(ht_ev, gaussian_ev); + sycl::event ht_ev = + dpctl::utils::keep_args_alive(exec_q, {res}, {gaussian_ev}); + return std::make_pair(ht_ev, gaussian_ev); } template @@ -239,7 +275,8 @@ struct GaussianContigFactory { fnT get() { - if constexpr (dispatch::GaussianTypePairSupportFactory::is_defined) { + if constexpr (dispatch::GaussianTypePairSupportFactory::is_defined) { return gaussian_impl; } else { @@ -250,7 +287,10 @@ struct GaussianContigFactory void init_gaussian_dispatch_3d_table(void) { - dispatch::Dispatch3DTableBuilder contig; + dispatch::Dispatch3DTableBuilder + contig; contig.populate(gaussian_dispatch_table, seq_of_vec_sizes); } -} // dpnp::backend::ext::rng::device +} // namespace dpnp::backend::ext::rng::device diff --git a/dpnp/backend/extensions/rng/device/gaussian.hpp b/dpnp/backend/extensions/rng/device/gaussian.hpp index 7b8b36cd98b3..00973a5d4e58 100644 --- a/dpnp/backend/extensions/rng/device/gaussian.hpp +++ b/dpnp/backend/extensions/rng/device/gaussian.hpp @@ -29,17 +29,17 @@ #include "engine/base_engine.hpp" - namespace dpnp::backend::ext::rng::device { -extern std::pair gaussian(engine::EngineBase *engine, - const std::uint8_t method_id, - const std::uint8_t vec_size, - const double mean, - const double stddev, - const std::uint64_t n, - dpctl::tensor::usm_ndarray res, - const std::vector &depends = {}); +extern std::pair + gaussian(engine::EngineBase *engine, + const std::uint8_t method_id, + const std::uint8_t vec_size, + const double mean, + const double stddev, + const std::uint64_t n, + dpctl::tensor::usm_ndarray res, + const std::vector &depends = {}); extern void init_gaussian_dispatch_3d_table(void); } // namespace dpnp::backend::ext::rng::device diff --git a/dpnp/backend/extensions/rng/device/rng_py.cpp b/dpnp/backend/extensions/rng/device/rng_py.cpp index 354061597b63..27cbdb80d8d7 100644 --- a/dpnp/backend/extensions/rng/device/rng_py.cpp +++ b/dpnp/backend/extensions/rng/device/rng_py.cpp @@ -35,25 +35,24 @@ #include "gaussian.hpp" -#include "engine/mrg32k3a_engine.hpp" -#include "engine/philox4x32x10_engine.hpp" #include "engine/mcg31m1_engine.hpp" #include "engine/mcg59_engine.hpp" - +#include "engine/mrg32k3a_engine.hpp" +#include "engine/philox4x32x10_engine.hpp" namespace mkl_rng = oneapi::mkl::rng; namespace rng_dev_ext = dpnp::backend::ext::rng::device; namespace rng_dev_engine = dpnp::backend::ext::rng::device::engine; namespace py = pybind11; - // populate dispatch 3-D tables void init_dispatch_3d_tables(void) { rng_dev_ext::init_gaussian_dispatch_3d_table(); } -class PyEngineBase : public rng_dev_engine::EngineBase { +class PyEngineBase : public rng_dev_engine::EngineBase +{ public: // inherit the constructor using EngineBase::EngineBase; @@ -63,45 +62,70 @@ class PyEngineBase : public rng_dev_engine::EngineBase { // PYBIND11_OVERRIDE_PURE( // sycl::queue&, /* Return type */ // EngineBase, /* Parent class */ - // get_queue, /* Name of function in C++ (must match Python name) */ + // get_queue, /* Name of function in C++ (must match Python name) + // */ // ); // } }; - PYBIND11_MODULE(_rng_dev_impl, m) { init_dispatch_3d_tables(); - py::class_(m, "EngineBase") + py::class_( + m, "EngineBase") .def(py::init<>()) .def("get_queue", &rng_dev_engine::EngineBase::get_queue); - py::class_(m, "MRG32k3a") - .def(py::init(), py::arg("sycl_queue"), py::arg("seed"), py::arg("offset") = 0) - .def(py::init &, std::uint64_t>(), py::arg("sycl_queue"), py::arg("seed"), py::arg("offset") = 0) - .def(py::init &>(), py::arg("sycl_queue"), py::arg("seed"), py::arg("offset") = py::list()) - .def(py::init &, std::vector &>(), py::arg("sycl_queue"), py::arg("seed"), py::arg("offset") = py::list()); - - py::class_(m, "PHILOX4x32x10") - .def(py::init(), py::arg("sycl_queue"), py::arg("seed"), py::arg("offset") = 0) - .def(py::init &, std::uint64_t>(), py::arg("sycl_queue"), py::arg("seed"), py::arg("offset") = 0) - .def(py::init &>(), py::arg("sycl_queue"), py::arg("seed"), py::arg("offset") = py::list()) - .def(py::init &, std::vector &>(), py::arg("sycl_queue"), py::arg("seed"), py::arg("offset") = py::list()); - - py::class_(m, "MCG31M1") - .def(py::init(), py::arg("sycl_queue"), py::arg("seed"), py::arg("offset") = 0) - .def(py::init &, std::uint64_t>(), py::arg("sycl_queue"), py::arg("seed"), py::arg("offset") = 0); + py::class_(m, + "MRG32k3a") + .def(py::init(), + py::arg("sycl_queue"), py::arg("seed"), py::arg("offset") = 0) + .def(py::init &, + std::uint64_t>(), + py::arg("sycl_queue"), py::arg("seed"), py::arg("offset") = 0) + .def(py::init &>(), + py::arg("sycl_queue"), py::arg("seed"), + py::arg("offset") = py::list()) + .def(py::init &, + std::vector &>(), + py::arg("sycl_queue"), py::arg("seed"), + py::arg("offset") = py::list()); + + py::class_( + m, "PHILOX4x32x10") + .def(py::init(), + py::arg("sycl_queue"), py::arg("seed"), py::arg("offset") = 0) + .def(py::init &, + std::uint64_t>(), + py::arg("sycl_queue"), py::arg("seed"), py::arg("offset") = 0) + .def(py::init &>(), + py::arg("sycl_queue"), py::arg("seed"), + py::arg("offset") = py::list()) + .def(py::init &, + std::vector &>(), + py::arg("sycl_queue"), py::arg("seed"), + py::arg("offset") = py::list()); + + py::class_(m, + "MCG31M1") + .def(py::init(), + py::arg("sycl_queue"), py::arg("seed"), py::arg("offset") = 0) + .def(py::init &, + std::uint64_t>(), + py::arg("sycl_queue"), py::arg("seed"), py::arg("offset") = 0); py::class_(m, "MCG59") - .def(py::init(), py::arg("sycl_queue"), py::arg("seed"), py::arg("offset") = 0) - .def(py::init &, std::uint64_t>(), py::arg("sycl_queue"), py::arg("seed"), py::arg("offset") = 0); - - m.def("_gaussian", &rng_dev_ext::gaussian, - "", - py::arg("engine"), - py::arg("method_id"), py::arg("vec_size"), - py::arg("mean"), py::arg("stddev"), - py::arg("n"), py::arg("res"), + .def(py::init(), + py::arg("sycl_queue"), py::arg("seed"), py::arg("offset") = 0) + .def(py::init &, + std::uint64_t>(), + py::arg("sycl_queue"), py::arg("seed"), py::arg("offset") = 0); + + m.def("_gaussian", &rng_dev_ext::gaussian, "", py::arg("engine"), + py::arg("method_id"), py::arg("vec_size"), py::arg("mean"), + py::arg("stddev"), py::arg("n"), py::arg("res"), py::arg("depends") = py::list()); } diff --git a/dpnp/backend/extensions/rng/distr_proxy.hpp b/dpnp/backend/extensions/rng/distr_proxy.hpp deleted file mode 100644 index 69c895f0608d..000000000000 --- a/dpnp/backend/extensions/rng/distr_proxy.hpp +++ /dev/null @@ -1,115 +0,0 @@ -//***************************************************************************** -// Copyright (c) 2023, Intel Corporation -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -// THE POSSIBILITY OF SUCH DAMAGE. -//***************************************************************************** - -#include - -#include - -namespace dpnp -{ -namespace backend -{ -namespace ext -{ -namespace rng -{ -namespace mkl_rng = oneapi::mkl::rng; - -template -class UnifromBase { -public: - using method_type = Method; - using result_type = DataType; - using distr_type = mkl_rng::uniform; - - - UnifromBase(DataType a, DataType b) { - distr = std::make_unique(a, b); - } - - distr_type& get_distr() const { return *distr; } - -private: - std::unique_ptr distr; -} - -class UniformB { -public: - - Uniform(sycl::queue queue, const int type_num, double a, double b): q(queue) { - if () - } - - void generate() { - // create distribution - } - -private: - int typenum; - std::unique_ptr engine; - - - - using method_type = Method; - // using result_type = RealType; - - DistrProxy(sycl::queue queue, std::uint32_t seed): q(queue) { - engine = std::make_unique(queue, seed); - }; - - // template - DistrProxy(sycl::queue queue, std::vector vec_seed): q(queue) { - switch (vec_seed.size()) { - case 1: - engine = std::make_unique(queue, std::initializer_list({vec_seed[0]})); - break; - case 2: - engine = std::make_unique(queue, std::initializer_list({vec_seed[0], vec_seed[1]})); - break; - case 3: - engine = std::make_unique(queue, std::initializer_list({vec_seed[0], vec_seed[1], vec_seed[2]})); - break; - default: - // TODO need to get rid of the limitation for seed vector length - throw std::runtime_error("Too long seed vector"); - } - }; - - ~DistrProxy() = default; - - sycl::queue& get_queue() { return q;} - EngineT& get_engine() const { return *engine;} - -private: - sycl::queue q; - std::unique_ptr engine; - // engine_t engine; - // sycl::queue q; - -}; -} // namespace lapack -} // namespace ext -} // namespace backend -} // namespace rng diff --git a/dpnp/backend/extensions/rng/engine_proxy.hpp b/dpnp/backend/extensions/rng/engine_proxy.hpp deleted file mode 100644 index 89346718feff..000000000000 --- a/dpnp/backend/extensions/rng/engine_proxy.hpp +++ /dev/null @@ -1,85 +0,0 @@ -//***************************************************************************** -// Copyright (c) 2023, Intel Corporation -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -// THE POSSIBILITY OF SUCH DAMAGE. -//***************************************************************************** - -#include - -namespace dpnp -{ -namespace backend -{ -namespace ext -{ -namespace rng -{ -class EngineBase { -public: - EngineBase(sycl::queue queue) { - q = std::make_unique(queue); - }; - - sycl::queue& get_queue() { return *q; } - -private: - std::unique_ptr q; -}; - - -template -class EngineProxy: public EngineBase { -public: - using engine_t = EngineT; - - EngineProxy(sycl::queue queue, SeedT seed): EngineBase(queue) { - engine = std::make_unique(queue, seed); - }; - - // template - EngineProxy(sycl::queue queue, std::vector vec_seed): EngineBase(queue) { - switch (vec_seed.size()) { - case 1: - engine = std::make_unique(queue, std::initializer_list({vec_seed[0]})); - break; - case 2: - engine = std::make_unique(queue, std::initializer_list({vec_seed[0], vec_seed[1]})); - break; - case 3: - engine = std::make_unique(queue, std::initializer_list({vec_seed[0], vec_seed[1], vec_seed[2]})); - break; - default: - // TODO need to get rid of the limitation for seed vector length - throw std::runtime_error("Too long seed vector"); - } - }; - - // ~EngineProxy() = default; - -private: - std::unique_ptr engine; - -}; -} // namespace lapack -} // namespace ext -} // namespace backend -} // namespace rng diff --git a/dpnp/backend/extensions/rng/rng_py.cpp b/dpnp/backend/extensions/rng/rng_py.cpp deleted file mode 100644 index a360f3c8140e..000000000000 --- a/dpnp/backend/extensions/rng/rng_py.cpp +++ /dev/null @@ -1,89 +0,0 @@ -//***************************************************************************** -// Copyright (c) 2023, Intel Corporation -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -// THE POSSIBILITY OF SUCH DAMAGE. -//***************************************************************************** -// -// This file defines functions of dpnp.backend._rng_impl extensions -// -//***************************************************************************** - -#include -#include - -#include - -#include - -#include "engine_proxy.hpp" -// #include "syevd.hpp" - -namespace mkl_rng = oneapi::mkl::rng; -namespace rng_ext = dpnp::backend::ext::rng; -namespace py = pybind11; - -// populate dispatch vectors -void init_dispatch_vectors(void) -{ - // lapack_ext::init_syevd_dispatch_vector(); -} - -// populate dispatch tables -void init_dispatch_tables(void) -{ - // lapack_ext::init_heevd_dispatch_table(); -} - - -PYBIND11_MODULE(_rng_impl, m) -{ - using engine_base_t = rng_ext::EngineBase; - py::class_ engine_base(m, "EngineBase"); - engine_base.def(py::init()) - .def("get_queue", &engine_base_t::get_queue); - - using mt19937_engine_t = rng_ext::EngineProxy; - py::class_(m, "mt19937", engine_base) - .def(py::init()) - .def(py::init>()); - - using mcg59_engine_t = rng_ext::EngineProxy; - py::class_(m, "mcg59", engine_base) - .def(py::init()); - - // init_dispatch_vectors(); - // init_dispatch_tables(); - - // m.def("_heevd", &lapack_ext::heevd, - // "Call `heevd` from OneMKL LAPACK library to return " - // "the eigenvalues and eigenvectors of a complex Hermitian matrix", - // py::arg("sycl_queue"), py::arg("jobz"), py::arg("upper_lower"), - // py::arg("eig_vecs"), py::arg("eig_vals"), - // py::arg("depends") = py::list()); - - // m.def("_syevd", &lapack_ext::syevd, - // "Call `syevd` from OneMKL LAPACK library to return " - // "the eigenvalues and eigenvectors of a real symmetric matrix", - // py::arg("sycl_queue"), py::arg("jobz"), py::arg("upper_lower"), - // py::arg("eig_vecs"), py::arg("eig_vals"), - // py::arg("depends") = py::list()); -} From 9d9540e043063b6b9fd58f8a649a083bba86c741 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 27 Mar 2024 12:52:15 +0100 Subject: [PATCH 15/16] Added base f/w for host API as an extension --- dpnp/CMakeLists.txt | 1 + .../extensions/rng/host/CMakeLists.txt | 77 ++++++++ .../extensions/rng/host/dispatch/matrix.hpp | 65 +++++++ .../rng/host/dispatch/table_builder.hpp | 106 ++++++++++ dpnp/backend/extensions/rng/host/gaussian.cpp | 181 ++++++++++++++++++ dpnp/backend/extensions/rng/host/gaussian.hpp | 44 +++++ dpnp/backend/extensions/rng/host/rng_py.cpp | 142 ++++++++++++++ 7 files changed, 616 insertions(+) create mode 100644 dpnp/backend/extensions/rng/host/CMakeLists.txt create mode 100644 dpnp/backend/extensions/rng/host/dispatch/matrix.hpp create mode 100644 dpnp/backend/extensions/rng/host/dispatch/table_builder.hpp create mode 100644 dpnp/backend/extensions/rng/host/gaussian.cpp create mode 100644 dpnp/backend/extensions/rng/host/gaussian.hpp create mode 100644 dpnp/backend/extensions/rng/host/rng_py.cpp diff --git a/dpnp/CMakeLists.txt b/dpnp/CMakeLists.txt index b4bdf13abbd1..9808d57c8256 100644 --- a/dpnp/CMakeLists.txt +++ b/dpnp/CMakeLists.txt @@ -59,6 +59,7 @@ add_subdirectory(backend) add_subdirectory(backend/extensions/blas) add_subdirectory(backend/extensions/lapack) add_subdirectory(backend/extensions/rng/device) +add_subdirectory(backend/extensions/rng/host) add_subdirectory(backend/extensions/vm) add_subdirectory(backend/extensions/sycl_ext) diff --git a/dpnp/backend/extensions/rng/host/CMakeLists.txt b/dpnp/backend/extensions/rng/host/CMakeLists.txt new file mode 100644 index 000000000000..f3db23c2a23e --- /dev/null +++ b/dpnp/backend/extensions/rng/host/CMakeLists.txt @@ -0,0 +1,77 @@ +# ***************************************************************************** +# Copyright (c) 2023, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + + +set(python_module_name _rng_host_impl) +pybind11_add_module(${python_module_name} MODULE + rng_py.cpp + gaussian.cpp +) + +if (WIN32) + if (${CMAKE_VERSION} VERSION_LESS "3.27") + # this is a work-around for target_link_options inserting option after -link option, cause + # linker to ignore it. + set(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -fsycl-device-code-split=per_kernel") + endif() +endif() + +set_target_properties(${python_module_name} PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON) + +target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) +target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/engine) +target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include) +target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src) + +target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIRS}) +target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR}) + +if (WIN32) + target_compile_options(${python_module_name} PRIVATE + /clang:-fno-approx-func + /clang:-fno-finite-math-only + ) +else() + target_compile_options(${python_module_name} PRIVATE + -fno-approx-func + -fno-finite-math-only + ) +endif() + +target_link_options(${python_module_name} PUBLIC -fsycl-device-code-split=per_kernel) +if (UNIX) + # this option is support on Linux only + target_link_options(${python_module_name} PUBLIC -fsycl-link-huge-device-code) +endif() + +if (DPNP_GENERATE_COVERAGE) + target_link_options(${python_module_name} PRIVATE -fprofile-instr-generate -fcoverage-mapping) +endif() + +target_link_libraries(${python_module_name} PUBLIC MKL::MKL_DPCPP) + +install(TARGETS ${python_module_name} + DESTINATION "dpnp/backend/extensions/rng/host" +) diff --git a/dpnp/backend/extensions/rng/host/dispatch/matrix.hpp b/dpnp/backend/extensions/rng/host/dispatch/matrix.hpp new file mode 100644 index 000000000000..eb7aa43450fd --- /dev/null +++ b/dpnp/backend/extensions/rng/host/dispatch/matrix.hpp @@ -0,0 +1,65 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + +#include "utils/type_dispatch.hpp" + +namespace dpnp::backend::ext::rng::host::dispatch +{ +namespace dpctl_td_ns = dpctl::tensor::type_dispatch; +namespace mkl_rng = oneapi::mkl::rng; + +template +struct TypePairDefinedEntry + : std::bool_constant && + std::is_same_v> +{ + static constexpr bool is_defined = true; +}; + +template +struct GaussianTypePairSupportFactory +{ + static constexpr bool is_defined = std::disjunction< + TypePairDefinedEntry, + TypePairDefinedEntry, + TypePairDefinedEntry, + TypePairDefinedEntry, + // fall-through + dpctl_td_ns::NotDefinedEntry>::is_defined; +}; +} // namespace dpnp::backend::ext::rng::host::dispatch diff --git a/dpnp/backend/extensions/rng/host/dispatch/table_builder.hpp b/dpnp/backend/extensions/rng/host/dispatch/table_builder.hpp new file mode 100644 index 000000000000..772fa8fc9f3b --- /dev/null +++ b/dpnp/backend/extensions/rng/host/dispatch/table_builder.hpp @@ -0,0 +1,106 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + +namespace dpnp::backend::ext::rng::host::dispatch +{ +namespace mkl_rng = oneapi::mkl::rng; + +template + typename factory, + int _no_of_engines, + int _no_of_types, + int _no_of_methods> +class Dispatch3DTableBuilder +{ +private: + template + const std::vector row_per_method() const + { + std::vector per_method = { + factory{} + .get(), + factory{} + .get(), + }; + assert(per_method.size() == _no_of_methods); + return per_method; + } + + template + auto table_per_type_and_method() const + { + std::vector> table_by_type = { + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method>(), + row_per_method>()}; + assert(table_by_type.size() == _no_of_types); + return table_by_type; + } + +public: + Dispatch3DTableBuilder() = default; + ~Dispatch3DTableBuilder() = default; + + void populate(funcPtrT table[][_no_of_types][_no_of_methods]) const + { + const auto map_by_engine = { + table_per_type_and_method(), + table_per_type_and_method(), + table_per_type_and_method(), + table_per_type_and_method()}; + assert(map_by_engine.size() == _no_of_engines); + + std::uint16_t engine_id = 0; + for (auto &table_by_type : map_by_engine) { + std::uint16_t type_id = 0; + for (auto &row_by_method : table_by_type) { + std::uint16_t method_id = 0; + for (auto &fn_ptr : row_by_method) { + table[engine_id][type_id][method_id] = fn_ptr; + ++method_id; + } + ++type_id; + } + ++engine_id; + } + } +}; +} // namespace dpnp::backend::ext::rng::host::dispatch diff --git a/dpnp/backend/extensions/rng/host/gaussian.cpp b/dpnp/backend/extensions/rng/host/gaussian.cpp new file mode 100644 index 000000000000..d6b4a2387023 --- /dev/null +++ b/dpnp/backend/extensions/rng/host/gaussian.cpp @@ -0,0 +1,181 @@ +//***************************************************************************** +// Copyright (c) 2023, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include + +#include + +// dpctl tensor headers +#include "utils/output_validation.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +#include "gaussian.hpp" + +#include "dispatch/matrix.hpp" +#include "dispatch/table_builder.hpp" + +namespace dpnp::backend::ext::rng::host +{ +namespace dpctl_td_ns = dpctl::tensor::type_dispatch; +namespace dpctl_tu_ns = dpctl::tensor::type_utils; +namespace mkl_rng = oneapi::mkl::rng; +namespace py = pybind11; + +constexpr auto no_of_methods = 2; // number of methods of gaussian distribution +constexpr auto no_of_engines = device::engine::no_of_engines; + +typedef sycl::event (*gaussian_impl_fn_ptr_t)( + device::engine::EngineBase *engine, + const double, + const double, + const std::uint64_t, + char *, + const std::vector &); + +static gaussian_impl_fn_ptr_t gaussian_dispatch_table[no_of_engines] + [dpctl_td_ns::num_types] + [no_of_methods]; + +template +static sycl::event gaussian_impl(device::engine::EngineBase *engine, + const double mean_val, + const double stddev_val, + const std::uint64_t n, + char *out_ptr, + const std::vector &depends) +{ + auto &exec_q = engine->get_queue(); + dpctl_tu_ns::validate_type_for_device(exec_q); + + DataT *out = reinterpret_cast(out_ptr); + DataT mean = static_cast(mean_val); + DataT stddev = static_cast(stddev_val); + + auto seed_values = engine->get_seeds(); + auto no_of_seeds = seed_values.size(); + if (no_of_seeds > 1) { + throw std::runtime_error(""); + } + + mkl_rng::gaussian distribution(mean, stddev); + mkl_rng::mcg59 eng(exec_q, seed_values[0]); + + return mkl_rng::generate(distribution, eng, n, out, depends); +} + +std::pair + gaussian(device::engine::EngineBase *engine, + const std::uint8_t method_id, + const double mean, + const double stddev, + const std::uint64_t n, + dpctl::tensor::usm_ndarray res, + const std::vector &depends) +{ + auto &exec_q = engine->get_queue(); + + const int res_nd = res.get_ndim(); + const py::ssize_t *res_shape = res.get_shape_raw(); + + size_t res_nelems(1); + for (int i = 0; i < res_nd; ++i) { + res_nelems *= static_cast(res_shape[i]); + } + + if (res_nelems == 0) { + // nothing to do + return std::make_pair(sycl::event(), sycl::event()); + } + + // ensure that output is ample enough to accommodate all elements + dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(res, res_nelems); + + if (!dpctl::utils::queues_are_compatible(exec_q, {res})) { + throw py::value_error( + "Execution queue is not compatible with the allocation queue"); + } + + bool is_res_c_contig = res.is_c_contiguous(); + if (!is_res_c_contig) { + throw std::runtime_error( + "Only population of contiguous array is supported."); + } + + auto enginge_id = engine->get_type().id(); + if (enginge_id >= device::engine::no_of_engines) { + throw std::runtime_error( + "Unknown engine type=" + std::to_string(enginge_id) + + " for gaussian distribution."); + } + + if (method_id >= no_of_methods) { + throw std::runtime_error("Unknown method=" + std::to_string(method_id) + + " for gaussian distribution."); + } + + auto array_types = dpctl_td_ns::usm_ndarray_types(); + int res_type_id = array_types.typenum_to_lookup_id(res.get_typenum()); + + auto gaussian_fn = + gaussian_dispatch_table[enginge_id][res_type_id][method_id]; + if (gaussian_fn == nullptr) { + throw py::value_error( + "No gaussian implementation defined for a required type"); + } + + char *res_data = res.get_data(); + sycl::event gaussian_ev = + gaussian_fn(engine, mean, stddev, n, res_data, depends); + + sycl::event ht_ev = + dpctl::utils::keep_args_alive(exec_q, {res}, {gaussian_ev}); + return std::make_pair(ht_ev, gaussian_ev); +} + +template +struct GaussianContigFactory +{ + fnT get() + { + if constexpr (dispatch::GaussianTypePairSupportFactory::is_defined) { + return gaussian_impl; + } + else { + return nullptr; + } + } +}; + +void init_gaussian_dispatch_3d_table(void) +{ + dispatch::Dispatch3DTableBuilder + contig; + contig.populate(gaussian_dispatch_table); +} +} // namespace dpnp::backend::ext::rng::host diff --git a/dpnp/backend/extensions/rng/host/gaussian.hpp b/dpnp/backend/extensions/rng/host/gaussian.hpp new file mode 100644 index 000000000000..2ebf5f976e2f --- /dev/null +++ b/dpnp/backend/extensions/rng/host/gaussian.hpp @@ -0,0 +1,44 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + +#include "../device/engine/base_engine.hpp" + +namespace dpnp::backend::ext::rng::host +{ +extern std::pair + gaussian(device::engine::EngineBase *engine, + const std::uint8_t method_id, + const double mean, + const double stddev, + const std::uint64_t n, + dpctl::tensor::usm_ndarray res, + const std::vector &depends = {}); + +extern void init_gaussian_dispatch_3d_table(void); +} // namespace dpnp::backend::ext::rng::host diff --git a/dpnp/backend/extensions/rng/host/rng_py.cpp b/dpnp/backend/extensions/rng/host/rng_py.cpp new file mode 100644 index 000000000000..d76b07242e7f --- /dev/null +++ b/dpnp/backend/extensions/rng/host/rng_py.cpp @@ -0,0 +1,142 @@ +//***************************************************************************** +// Copyright (c) 2023, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +// This file defines functions of dpnp.backend._rng_impl extensions +// +//***************************************************************************** + +#include +#include + +#include +#include + +#include "gaussian.hpp" + +// #include "../device/engine/mcg31m1_engine.hpp" +// #include "../device/engine/mcg59_engine.hpp" +// #include "../device/engine/mrg32k3a_engine.hpp" +// #include "../device/engine/philox4x32x10_engine.hpp" + +namespace mkl_rng = oneapi::mkl::rng; +namespace rng_host_ext = dpnp::backend::ext::rng::host; +// namespace rng_dev_engine = dpnp::backend::ext::rng::device::engine; +namespace py = pybind11; + +// populate dispatch 3-D tables +void init_dispatch_3d_tables(void) +{ + rng_host_ext::init_gaussian_dispatch_3d_table(); +} + +// class PyEngineBase : public rng_dev_engine::EngineBase +// { +// public: +// // inherit the constructor +// using EngineBase::EngineBase; + +// // trampoline (need one for each virtual function) +// // sycl::queue &get_queue() { +// // PYBIND11_OVERRIDE_PURE( +// // sycl::queue&, /* Return type */ +// // EngineBase, /* Parent class */ +// // get_queue, /* Name of function in C++ (must match Python +// name) +// // */ +// // ); +// // } +// }; + +PYBIND11_MODULE(_rng_host_impl, m) +{ + init_dispatch_3d_tables(); + + // py::class_( + // m, "EngineBase") + // .def(py::init<>()) + // .def("get_queue", &rng_dev_engine::EngineBase::get_queue); + + // py::class_(m, + // "MRG32k3a") + // .def(py::init(), + // py::arg("sycl_queue"), py::arg("seed"), py::arg("offset") = + // 0) + // .def(py::init &, + // std::uint64_t>(), + // py::arg("sycl_queue"), py::arg("seed"), py::arg("offset") = + // 0) + // .def(py::init &>(), + // py::arg("sycl_queue"), py::arg("seed"), + // py::arg("offset") = py::list()) + // .def(py::init &, + // std::vector &>(), + // py::arg("sycl_queue"), py::arg("seed"), + // py::arg("offset") = py::list()); + + // py::class_( + // m, "PHILOX4x32x10") + // .def(py::init(), + // py::arg("sycl_queue"), py::arg("seed"), py::arg("offset") = + // 0) + // .def(py::init &, + // std::uint64_t>(), + // py::arg("sycl_queue"), py::arg("seed"), py::arg("offset") = + // 0) + // .def(py::init &>(), + // py::arg("sycl_queue"), py::arg("seed"), + // py::arg("offset") = py::list()) + // .def(py::init &, + // std::vector &>(), + // py::arg("sycl_queue"), py::arg("seed"), + // py::arg("offset") = py::list()); + + // py::class_(m, + // "MCG31M1") + // .def(py::init(), + // py::arg("sycl_queue"), py::arg("seed"), py::arg("offset") = + // 0) + // .def(py::init &, + // std::uint64_t>(), + // py::arg("sycl_queue"), py::arg("seed"), py::arg("offset") = + // 0); + + // py::class_(m, + // "MCG59") + // .def(py::init(), + // py::arg("sycl_queue"), py::arg("seed"), py::arg("offset") = + // 0) + // .def(py::init &, + // std::uint64_t>(), + // py::arg("sycl_queue"), py::arg("seed"), py::arg("offset") = + // 0); + + m.def("_gaussian", &rng_host_ext::gaussian, "", py::arg("engine"), + py::arg("method_id"), py::arg("mean"), py::arg("stddev"), + py::arg("n"), py::arg("res"), py::arg("depends") = py::list()); +} From 199f13b4f56f54c99a5008b8a2cf063307ad4b2c Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Thu, 16 May 2024 18:12:02 +0200 Subject: [PATCH 16/16] Added uniform distribution --- .../extensions/rng/device/CMakeLists.txt | 1 + .../extensions/rng/device/common_impl.hpp | 19 +- .../extensions/rng/device/dispatch/matrix.hpp | 24 +- .../rng/device/dispatch/table_builder.hpp | 48 ++- .../extensions/rng/device/gaussian.cpp | 4 +- .../backend/extensions/rng/device/uniform.cpp | 295 ++++++++++++++++++ .../backend/extensions/rng/device/uniform.hpp | 45 +++ 7 files changed, 387 insertions(+), 49 deletions(-) create mode 100644 dpnp/backend/extensions/rng/device/uniform.cpp create mode 100644 dpnp/backend/extensions/rng/device/uniform.hpp diff --git a/dpnp/backend/extensions/rng/device/CMakeLists.txt b/dpnp/backend/extensions/rng/device/CMakeLists.txt index c8ea8e8c9071..37df343f40b9 100644 --- a/dpnp/backend/extensions/rng/device/CMakeLists.txt +++ b/dpnp/backend/extensions/rng/device/CMakeLists.txt @@ -28,6 +28,7 @@ set(python_module_name _rng_dev_impl) pybind11_add_module(${python_module_name} MODULE rng_py.cpp gaussian.cpp + uniform.cpp ) if (WIN32) diff --git a/dpnp/backend/extensions/rng/device/common_impl.hpp b/dpnp/backend/extensions/rng/device/common_impl.hpp index e7e32baedb26..2b4744c251fd 100644 --- a/dpnp/backend/extensions/rng/device/common_impl.hpp +++ b/dpnp/backend/extensions/rng/device/common_impl.hpp @@ -30,17 +30,7 @@ #include #include -namespace dpnp -{ -namespace backend -{ -namespace ext -{ -namespace rng -{ -namespace device -{ -namespace details +namespace dpnp::backend::ext::rng::device::details { namespace py = pybind11; @@ -129,9 +119,4 @@ struct RngContigFunctor } } }; -} // namespace details -} // namespace device -} // namespace rng -} // namespace ext -} // namespace backend -} // namespace dpnp +} // namespace dpnp::backend::ext::rng::device::details diff --git a/dpnp/backend/extensions/rng/device/dispatch/matrix.hpp b/dpnp/backend/extensions/rng/device/dispatch/matrix.hpp index 5b1f87ed66c6..6cbd42538584 100644 --- a/dpnp/backend/extensions/rng/device/dispatch/matrix.hpp +++ b/dpnp/backend/extensions/rng/device/dispatch/matrix.hpp @@ -49,19 +49,35 @@ struct GaussianTypePairSupportFactory TypePairDefinedEntry, + mkl_rng_dev::gaussian_method::box_muller2>, TypePairDefinedEntry, + // fall-through + dpctl_td_ns::NotDefinedEntry>::is_defined; +}; + +template +struct UniformTypePairSupportFactory +{ + static constexpr bool is_defined = std::disjunction< + TypePairDefinedEntry, + TypePairDefinedEntry, TypePairDefinedEntry, + mkl_rng_dev::uniform_method::standard>, TypePairDefinedEntry, + mkl_rng_dev::uniform_method::accurate>, // fall-through dpctl_td_ns::NotDefinedEntry>::is_defined; }; diff --git a/dpnp/backend/extensions/rng/device/dispatch/table_builder.hpp b/dpnp/backend/extensions/rng/device/dispatch/table_builder.hpp index e84f17534949..44044b36af24 100644 --- a/dpnp/backend/extensions/rng/device/dispatch/table_builder.hpp +++ b/dpnp/backend/extensions/rng/device/dispatch/table_builder.hpp @@ -40,37 +40,34 @@ template + template const std::vector row_per_method() const { std::vector per_method = { - factory{} - .get(), - factory{} - .get(), + factory{}.get()..., }; assert(per_method.size() == _no_of_methods); return per_method; } - template + template auto table_per_type_and_method() const { std::vector> table_by_type = { - row_per_method(), - row_per_method(), - row_per_method(), - row_per_method(), - row_per_method(), - row_per_method(), - row_per_method(), - row_per_method(), - row_per_method(), - row_per_method(), - row_per_method(), - row_per_method(), - row_per_method>(), - row_per_method>()}; + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method(), + row_per_method, Methods...>(), + row_per_method, Methods...>()}; assert(table_by_type.size() == _no_of_types); return table_by_type; } @@ -79,16 +76,15 @@ class Dispatch3DTableBuilder Dispatch3DTableBuilder() = default; ~Dispatch3DTableBuilder() = default; - template + template void populate(funcPtrT table[][_no_of_types][_no_of_methods], std::integer_sequence) const { const auto map_by_engine = { - table_per_type_and_method>()..., - table_per_type_and_method< - mkl_rng_dev::philox4x32x10>()..., - table_per_type_and_method>()..., - table_per_type_and_method>()...}; + table_per_type_and_method, Methods...>()..., + table_per_type_and_method, Methods...>()..., + table_per_type_and_method, Methods...>()..., + table_per_type_and_method, Methods...>()...}; assert(map_by_engine.size() == _no_of_engines); std::uint16_t engine_id = 0; diff --git a/dpnp/backend/extensions/rng/device/gaussian.cpp b/dpnp/backend/extensions/rng/device/gaussian.cpp index 74866c628aa0..643c4091fee9 100644 --- a/dpnp/backend/extensions/rng/device/gaussian.cpp +++ b/dpnp/backend/extensions/rng/device/gaussian.cpp @@ -51,7 +51,7 @@ using dpctl_krn_ns::disabled_sg_loadstore_wrapper_krn; using dpctl_krn_ns::is_aligned; using dpctl_krn_ns::required_alignment; -constexpr auto no_of_methods = 2; // number of methods of gaussian distribution +constexpr auto no_of_methods = 1; // number of methods of gaussian distribution constexpr auto seq_of_vec_sizes = std::integer_sequence{}; @@ -291,6 +291,6 @@ void init_gaussian_dispatch_3d_table(void) GaussianContigFactory, no_of_engines, dpctl_td_ns::num_types, no_of_methods> contig; - contig.populate(gaussian_dispatch_table, seq_of_vec_sizes); + contig.populate(gaussian_dispatch_table, seq_of_vec_sizes); } } // namespace dpnp::backend::ext::rng::device diff --git a/dpnp/backend/extensions/rng/device/uniform.cpp b/dpnp/backend/extensions/rng/device/uniform.cpp new file mode 100644 index 000000000000..62ef69a29ea4 --- /dev/null +++ b/dpnp/backend/extensions/rng/device/uniform.cpp @@ -0,0 +1,295 @@ +//***************************************************************************** +// Copyright (c) 2023, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include + +// dpctl tensor headers +#include "kernels/alignment.hpp" +#include "utils/output_validation.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +#include "common_impl.hpp" +#include "uniform.hpp" + +#include "engine/builder/builder.hpp" + +#include "dispatch/matrix.hpp" +#include "dispatch/table_builder.hpp" + +namespace dpnp::backend::ext::rng::device +{ +namespace dpctl_krn_ns = dpctl::tensor::kernels::alignment_utils; +namespace dpctl_td_ns = dpctl::tensor::type_dispatch; +namespace dpctl_tu_ns = dpctl::tensor::type_utils; +namespace mkl_rng_dev = oneapi::mkl::rng::device; +namespace py = pybind11; + +using dpctl_krn_ns::disabled_sg_loadstore_wrapper_krn; +using dpctl_krn_ns::is_aligned; +using dpctl_krn_ns::required_alignment; + +constexpr auto no_of_methods = 2; // number of methods of gaussian distribution + +constexpr auto seq_of_vec_sizes = + std::integer_sequence{}; +constexpr auto vec_sizes_len = seq_of_vec_sizes.size(); +constexpr auto no_of_engines = engine::no_of_engines * vec_sizes_len; + +template +inline auto find_vec_size_impl(const VecSizeT vec_size, + std::index_sequence) +{ + return std::min({((Ints == vec_size) ? Indices : sizeof...(Indices))...}); +} + +template +int find_vec_size(const VecSizeT vec_size, + std::integer_sequence) +{ + auto res = find_vec_size_impl( + vec_size, std::make_index_sequence{}); + return (res == sizeof...(Ints)) ? -1 : res; +} + +template +struct DistributorBuilder +{ +private: + const DataT mean_; + const DataT stddev_; + +public: + using result_type = DataT; + using method_type = Method; + using distr_type = typename mkl_rng_dev::uniform; + + DistributorBuilder(const DataT mean, const DataT stddev) + : mean_(mean), stddev_(stddev) + { + } + + inline auto operator()(void) const + { + return distr_type(mean_, stddev_); + } +}; + +typedef sycl::event (*uniform_impl_fn_ptr_t)(engine::EngineBase *engine, + const double, + const double, + const std::uint64_t, + char *, + const std::vector &); + +static uniform_impl_fn_ptr_t uniform_dispatch_table[no_of_engines] + [dpctl_td_ns::num_types] + [no_of_methods]; + +template +class uniform_kernel; + +template +static sycl::event uniform_impl(engine::EngineBase *engine, + const double a_val, + const double b_val, + const std::uint64_t n, + char *out_ptr, + const std::vector &depends) +{ + auto &exec_q = engine->get_queue(); + dpctl_tu_ns::validate_type_for_device(exec_q); + + DataT *out = reinterpret_cast(out_ptr); + DataT a = static_cast(a_val); + DataT b = static_cast(b_val); + + constexpr std::size_t vec_sz = EngineT::vec_size; + constexpr std::size_t items_per_wi = 4; + constexpr std::size_t local_size = 256; + const std::size_t wg_items = local_size * vec_sz * items_per_wi; + const std::size_t global_size = + ((n + wg_items - 1) / (wg_items)) * local_size; + + sycl::event distr_event; + + try { + distr_event = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(depends); + + using EngineBuilderT = engine::builder::Builder; + EngineBuilderT eng_builder(engine); + // eng_builder.print(); // TODO: remove + + using DistributorBuilderT = DistributorBuilder; + DistributorBuilderT dist_builder(a, b); + + if (is_aligned(out_ptr)) { + constexpr bool enable_sg_load = true; + using KernelName = + uniform_kernel; + + cgh.parallel_for( + sycl::nd_range<1>({global_size}, {local_size}), + details::RngContigFunctor( + eng_builder, dist_builder, out, n)); + } + else { + constexpr bool disable_sg_load = false; + using InnerKernelName = + uniform_kernel; + using KernelName = + disabled_sg_loadstore_wrapper_krn; + + cgh.parallel_for( + sycl::nd_range<1>({global_size}, {local_size}), + details::RngContigFunctor( + eng_builder, dist_builder, out, n)); + } + }); + } catch (oneapi::mkl::exception const &e) { + std::stringstream error_msg; + + error_msg + << "Unexpected MKL exception caught during gaussian call:\nreason: " + << e.what(); + throw std::runtime_error(error_msg.str()); + } catch (sycl::exception const &e) { + std::stringstream error_msg; + + error_msg << "Unexpected SYCL exception caught during gaussian call:\n" + << e.what(); + throw std::runtime_error(error_msg.str()); + } + return distr_event; +} + +std::pair + uniform(engine::EngineBase *engine, + const std::uint8_t method_id, + const std::uint8_t vec_size, + const double a, + const double b, + const std::uint64_t n, + dpctl::tensor::usm_ndarray res, + const std::vector &depends) +{ + auto &exec_q = engine->get_queue(); + + const int res_nd = res.get_ndim(); + const py::ssize_t *res_shape = res.get_shape_raw(); + + size_t res_nelems(1); + for (int i = 0; i < res_nd; ++i) { + res_nelems *= static_cast(res_shape[i]); + } + + if (res_nelems == 0) { + // nothing to do + return std::make_pair(sycl::event(), sycl::event()); + } + + // ensure that output is ample enough to accommodate all elements + dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(res, res_nelems); + + if (!dpctl::utils::queues_are_compatible(exec_q, {res})) { + throw py::value_error( + "Execution queue is not compatible with the allocation queue"); + } + + bool is_res_c_contig = res.is_c_contiguous(); + if (!is_res_c_contig) { + throw std::runtime_error( + "Only population of contiguous array is supported."); + } + + auto enginge_id = engine->get_type().id(); + if (enginge_id >= engine::no_of_engines) { + throw std::runtime_error( + "Unknown engine type=" + std::to_string(enginge_id) + + " for gaussian distribution."); + } + + if (method_id >= no_of_methods) { + throw std::runtime_error("Unknown method=" + std::to_string(method_id) + + " for gaussian distribution."); + } + + int vec_size_id = find_vec_size(vec_size, seq_of_vec_sizes); + if (vec_size_id < 0) { + throw std::runtime_error("Vector size=" + std::to_string(vec_size) + + " is out of supported range"); + } + enginge_id = enginge_id * vec_sizes_len + vec_size_id; + + auto array_types = dpctl_td_ns::usm_ndarray_types(); + int res_type_id = array_types.typenum_to_lookup_id(res.get_typenum()); + + auto uniform_fn = + uniform_dispatch_table[enginge_id][res_type_id][method_id]; + if (uniform_fn == nullptr) { + throw py::value_error( + "No gaussian implementation defined for a required type"); + } + + char *res_data = res.get_data(); + sycl::event uniform_ev = + uniform_fn(engine, a, b, n, res_data, depends); + + sycl::event ht_ev = + dpctl::utils::keep_args_alive(exec_q, {res}, {uniform_ev}); + return std::make_pair(ht_ev, uniform_ev); +} + +template +struct UniformContigFactory +{ + fnT get() + { + if constexpr (dispatch::UniformTypePairSupportFactory::is_defined) { + return uniform_impl; + } + else { + return nullptr; + } + } +}; + +void init_uniform_dispatch_3d_table(void) +{ + dispatch::Dispatch3DTableBuilder + contig; + contig.populate(uniform_dispatch_table, seq_of_vec_sizes); +} +} // namespace dpnp::backend::ext::rng::device diff --git a/dpnp/backend/extensions/rng/device/uniform.hpp b/dpnp/backend/extensions/rng/device/uniform.hpp new file mode 100644 index 000000000000..e45bc45f31da --- /dev/null +++ b/dpnp/backend/extensions/rng/device/uniform.hpp @@ -0,0 +1,45 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + +#include "engine/base_engine.hpp" + +namespace dpnp::backend::ext::rng::device +{ +extern std::pair + uniform(engine::EngineBase *engine, + const std::uint8_t method_id, + const std::uint8_t vec_size, + const double a, + const double b, + const std::uint64_t n, + dpctl::tensor::usm_ndarray res, + const std::vector &depends = {}); + +extern void init_uniform_dispatch_3d_table(void); +} // namespace dpnp::backend::ext::rng::device