Skip to content

Commit

Permalink
SYCL] Fixed-size groups and partitions are renamed to "chunks"
Browse files Browse the repository at this point in the history
  • Loading branch information
AndreiZibrov committed Nov 24, 2024
1 parent 4abd474 commit fb1d33d
Show file tree
Hide file tree
Showing 11 changed files with 45 additions and 38 deletions.
8 changes: 4 additions & 4 deletions llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def AspectExt_oneapi_bindless_images_sample_1d_usm : Aspect<"ext_oneapi_bindless
def AspectExt_oneapi_bindless_images_sample_2d_usm : Aspect<"ext_oneapi_bindless_images_sample_2d_usm">;
def AspectExt_intel_esimd : Aspect<"ext_intel_esimd">;
def AspectExt_oneapi_ballot_group : Aspect<"ext_oneapi_ballot_group">;
def AspectExt_oneapi_fixed_size_group : Aspect<"ext_oneapi_fixed_size_group">;
def AspectExt_oneapi_chunk : Aspect<"ext_oneapi_chunk">;
def AspectExt_oneapi_opportunistic_group : Aspect<"ext_oneapi_opportunistic_group">;
def AspectExt_oneapi_tangle_group : Aspect<"ext_oneapi_tangle_group">;
def AspectExt_intel_matrix : Aspect<"ext_intel_matrix">;
Expand Down Expand Up @@ -144,7 +144,7 @@ def : TargetInfo<"__TestAspectList",
AspectExt_oneapi_bindless_sampled_image_fetch_2d_usm, AspectExt_oneapi_bindless_sampled_image_fetch_2d,
AspectExt_oneapi_bindless_sampled_image_fetch_3d,
AspectExt_intel_esimd,
AspectExt_oneapi_ballot_group, AspectExt_oneapi_fixed_size_group, AspectExt_oneapi_opportunistic_group,
AspectExt_oneapi_ballot_group, AspectExt_oneapi_chunk, AspectExt_oneapi_opportunistic_group,
AspectExt_oneapi_tangle_group, AspectExt_intel_matrix, AspectExt_oneapi_is_composite, AspectExt_oneapi_is_component,
AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph, AspectExt_oneapi_private_alloca,
AspectExt_oneapi_queue_profiling_tag, AspectExt_oneapi_virtual_mem, AspectExt_oneapi_cuda_cluster_group,
Expand All @@ -163,7 +163,7 @@ defvar IntelCpuAspects = [
AspectCpu, AspectFp16, AspectFp64, AspectQueue_profiling, AspectAtomic64,
AspectExt_oneapi_srgb, AspectExt_oneapi_native_assert,
AspectExt_intel_legacy_image, AspectExt_oneapi_ballot_group,
AspectExt_oneapi_fixed_size_group, AspectExt_oneapi_opportunistic_group,
AspectExt_oneapi_chunk, AspectExt_oneapi_opportunistic_group,
AspectExt_oneapi_tangle_group, AspectExt_oneapi_private_alloca
] # AllUSMAspects;

Expand Down Expand Up @@ -231,7 +231,7 @@ class CudaTargetInfo<string targetName, list<Aspect> aspectList, int subGroupSiz
defvar CudaMinAspects = !listconcat(AllUSMAspects, [AspectGpu, AspectFp64, AspectOnline_compiler, AspectOnline_linker,
AspectQueue_profiling, AspectExt_intel_pci_address, AspectExt_intel_max_mem_bandwidth, AspectExt_intel_memory_bus_width,
AspectExt_intel_device_info_uuid, AspectExt_oneapi_native_assert, AspectExt_intel_free_memory, AspectExt_intel_device_id,
AspectExt_intel_memory_clock_rate, AspectExt_oneapi_ballot_group, AspectExt_oneapi_fixed_size_group,
AspectExt_intel_memory_clock_rate, AspectExt_oneapi_ballot_group, AspectExt_oneapi_chunk,
AspectExt_oneapi_opportunistic_group, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph]);
// Bindless images aspects are partially supported on CUDA and disabled by default at the moment.
defvar CudaBindlessImagesAspects = [AspectExt_oneapi_bindless_images, AspectExt_oneapi_bindless_images_shared_usm,
Expand Down
12 changes: 6 additions & 6 deletions sycl/include/sycl/detail/spirv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ struct is_ballot_group<
sycl::ext::oneapi::experimental::ballot_group<ParentGroup>>
: std::true_type {};

template <typename Group> struct is_fixed_size_group : std::false_type {};
template <typename Group> struct is_chunk : std::false_type {};

template <size_t ChunkSize, typename ParentGroup>
struct is_chunk<sycl::ext::oneapi::experimental::chunk<
Expand Down Expand Up @@ -888,7 +888,7 @@ inline uint32_t MapShuffleID(GroupT g, id<1> local_id) {
if constexpr (is_tangle_or_opportunistic_group<GroupT>::value ||
is_ballot_group<GroupT>::value)
return detail::IdToMaskPosition(g, local_id);
else if constexpr (is_fixed_size_group<GroupT>::value)
else if constexpr (is_chunk<GroupT>::value)
return g.get_group_linear_id() * g.get_local_range().size() + local_id;
else
return local_id.get(0);
Expand Down Expand Up @@ -983,7 +983,7 @@ EnableIfNativeShuffle<T> ShuffleXor(GroupT g, T x, id<1> mask) {
if constexpr (ext::oneapi::experimental::is_user_constructed_group_v<
GroupT>) {
auto MemberMask = detail::ExtractMask(detail::GetMask(g))[0];
if constexpr (is_fixed_size_group_v<GroupT>) {
if constexpr (is_chunk_v<GroupT>) {
return cuda_shfl_sync_bfly_i32(MemberMask, x,
static_cast<uint32_t>(mask.get(0)), 0x1f);

Expand Down Expand Up @@ -1031,7 +1031,7 @@ EnableIfNativeShuffle<T> ShuffleDown(GroupT g, T x, uint32_t delta) {
if constexpr (ext::oneapi::experimental::is_user_constructed_group_v<
GroupT>) {
auto MemberMask = detail::ExtractMask(detail::GetMask(g))[0];
if constexpr (is_fixed_size_group_v<GroupT>) {
if constexpr (is_chunk_v<GroupT>) {
return cuda_shfl_sync_down_i32(MemberMask, x, delta, 31);
} else {
unsigned localSetBit = g.get_local_id()[0] + 1;
Expand Down Expand Up @@ -1075,7 +1075,7 @@ EnableIfNativeShuffle<T> ShuffleUp(GroupT g, T x, uint32_t delta) {
if constexpr (ext::oneapi::experimental::is_user_constructed_group_v<
GroupT>) {
auto MemberMask = detail::ExtractMask(detail::GetMask(g))[0];
if constexpr (is_fixed_size_group_v<GroupT>) {
if constexpr (is_chunk_v<GroupT>) {
return cuda_shfl_sync_up_i32(MemberMask, x, delta, 0);
} else {
unsigned localSetBit = g.get_local_id()[0] + 1;
Expand Down Expand Up @@ -1301,7 +1301,7 @@ ControlBarrier(Group g, memory_scope FenceScope, memory_order Order) {
template <__spv::GroupOperation Op, size_t ChunkSize, \
typename ParentGroup, typename T> \
inline T Group##Instruction( \
ext::oneapi::experimental::chunk<ChunkSize, ParentGroup> g, T x) \
ext::oneapi::experimental::chunk<ChunkSize, ParentGroup> g, T x) { \
using ConvertedT = detail::ConvertToOpenCLType_t<T>; \
\
using OCLT = std::conditional_t< \
Expand Down
4 changes: 2 additions & 2 deletions sycl/include/sycl/detail/type_traits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@
namespace sycl {
inline namespace _V1 {
namespace detail {
template <class T> struct is_fixed_size_group : std::false_type {};
template <class T> struct is_chunk : std::false_type {};

template <class T>
inline constexpr bool is_fixed_size_group_v = is_fixed_size_group<T>::value;
inline constexpr bool is_chunk_v = is_chunk<T>::value;

template <typename VecT, typename OperationLeftT, typename OperationRightT,
template <typename> class OperationCurrentT, int... Indexes>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//==--- fixed_size_group.hpp --- SYCL extension for non-uniform groups -----==//
//==--- chunk.hpp --- SYCL extension for non-uniform groups -----==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
Expand All @@ -10,7 +10,7 @@

#include <sycl/aspects.hpp>
#include <sycl/detail/spirv.hpp>
#include <sycl/detail/type_traits.hpp> // for is_fixed_size_group, is_group
#include <sycl/detail/type_traits.hpp> // for is_chunk, is_group
#include <sycl/exception.hpp>
#include <sycl/ext/oneapi/experimental/non_uniform_groups.hpp>
#include <sycl/ext/oneapi/sub_group_mask.hpp> // for sub_group_mask
Expand All @@ -30,7 +30,7 @@ template <size_t ChunkSize, typename ParentGroup> class chunk;

template <size_t ChunkSize, typename Group>
#ifdef __SYCL_DEVICE_ONLY__
[[__sycl_detail__::__uses_aspects__(sycl::aspect::ext_oneapi_fixed_size_group)]]
[[__sycl_detail__::__uses_aspects__(sycl::aspect::ext_oneapi_chunk)]]
#endif
inline std::enable_if_t<sycl::is_group_v<std::decay_t<Group>> &&
std::is_same_v<Group, sycl::sub_group>,
Expand All @@ -44,7 +44,9 @@ template <size_t ChunkSize, typename ParentGroup> class chunk {
using linear_id_type = typename ParentGroup::linear_id_type;
static constexpr int dimensions = 1;
static constexpr sycl::memory_scope fence_scope = ParentGroup::fence_scope;

/* ToDo:wd
we don't have fragment (operator fragment<ParentGroup>() const;) implementation yet.
*/
id_type get_group_id() const {
#ifdef __SYCL_DEVICE_ONLY__
return __spirv_SubgroupLocalInvocationId() / ChunkSize;
Expand Down Expand Up @@ -132,9 +134,9 @@ template <size_t ChunkSize, typename ParentGroup> class chunk {
#endif

#if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__)
fixed_size_group(ext::oneapi::sub_group_mask mask) : Mask(mask) {}
chunk(ext::oneapi::sub_group_mask mask) : Mask(mask) {}
#else
fixed_size_group() {}
chunk() {}
#endif

friend chunk<ChunkSize, ParentGroup>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,9 @@ masked_reduction_cuda_sm80(Group g, T x, BinaryOperation binary_op,

//// Shuffle based masked reduction impls

// fixed_size_group group reduction using shfls
// chunk group reduction using shfls
template <typename Group, typename T, class BinaryOperation>
inline __SYCL_ALWAYS_INLINE std::enable_if_t<is_fixed_size_group_v<Group>, T>
inline __SYCL_ALWAYS_INLINE std::enable_if_t<is_chunk_v<Group>, T>
masked_reduction_cuda_shfls(Group g, T x, BinaryOperation binary_op,
const uint32_t MemberMask) {
for (int i = g.get_local_range()[0] / 2; i > 0; i /= 2) {
Expand All @@ -111,7 +111,7 @@ masked_reduction_cuda_shfls(Group g, T x, BinaryOperation binary_op,
template <typename Group, typename T, class BinaryOperation>
inline __SYCL_ALWAYS_INLINE std::enable_if_t<
ext::oneapi::experimental::is_user_constructed_group_v<Group> &&
!is_fixed_size_group_v<Group>,
!is_chunk_v<Group>,
T>
masked_reduction_cuda_shfls(Group g, T x, BinaryOperation binary_op,
const uint32_t MemberMask) {
Expand Down Expand Up @@ -208,10 +208,10 @@ inline __SYCL_ALWAYS_INLINE

//// Shuffle based masked reduction impls

// fixed_size_group group scan using shfls
// chunk group scan using shfls
template <__spv::GroupOperation Op, typename Group, typename T,
class BinaryOperation>
inline __SYCL_ALWAYS_INLINE std::enable_if_t<is_fixed_size_group_v<Group>, T>
inline __SYCL_ALWAYS_INLINE std::enable_if_t<is_chunk_v<Group>, T>
masked_scan_cuda_shfls(Group g, T x, BinaryOperation binary_op,
const uint32_t MemberMask) {
unsigned localIdVal = g.get_local_id()[0];
Expand All @@ -233,7 +233,7 @@ template <__spv::GroupOperation Op, typename Group, typename T,
class BinaryOperation>
inline __SYCL_ALWAYS_INLINE std::enable_if_t<
ext::oneapi::experimental::is_user_constructed_group_v<Group> &&
!is_fixed_size_group_v<Group>,
!is_chunk_v<Group>,
T>
masked_scan_cuda_shfls(Group g, T x, BinaryOperation binary_op,
const uint32_t MemberMask) {
Expand Down
2 changes: 1 addition & 1 deletion sycl/include/sycl/info/aspects.def
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ __SYCL_ASPECT(ext_oneapi_mipmap_anisotropy, 51)
__SYCL_ASPECT(ext_oneapi_mipmap_level_reference, 52)
__SYCL_ASPECT(ext_intel_esimd, 53)
__SYCL_ASPECT(ext_oneapi_ballot_group, 54)
__SYCL_ASPECT(ext_oneapi_fixed_size_group, 55)
__SYCL_ASPECT(ext_oneapi_chunk, 55)
__SYCL_ASPECT(ext_oneapi_opportunistic_group, 56)
__SYCL_ASPECT(ext_oneapi_tangle_group, 57)
__SYCL_ASPECT(ext_intel_matrix, 58)
Expand Down
2 changes: 1 addition & 1 deletion sycl/include/sycl/sycl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@
#include <sycl/ext/oneapi/experimental/composite_device.hpp>
#include <sycl/ext/oneapi/experimental/cuda/barrier.hpp>
#include <sycl/ext/oneapi/experimental/enqueue_functions.hpp>
#include <sycl/ext/oneapi/experimental/fixed_size_group.hpp>
#include <sycl/ext/oneapi/experimental/chunk.hpp>
#include <sycl/ext/oneapi/experimental/forward_progress.hpp>
#include <sycl/ext/oneapi/experimental/group_load_store.hpp>
#include <sycl/ext/oneapi/experimental/group_sort.hpp>
Expand Down
2 changes: 1 addition & 1 deletion sycl/source/detail/device_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -658,7 +658,7 @@ bool device_impl::has(aspect Aspect) const {
return call_successful && support;
}
case aspect::ext_oneapi_ballot_group:
case aspect::ext_oneapi_fixed_size_group:
case aspect::ext_oneapi_chunk:
case aspect::ext_oneapi_opportunistic_group: {
return (this->getBackend() == backend::ext_oneapi_level_zero) ||
(this->getBackend() == backend::opencl) ||
Expand Down
12 changes: 10 additions & 2 deletions sycl/test-e2e/NonUniformGroups/chunk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,16 @@
// REQUIRES: cpu || gpu
// UNSUPPORTED: hip
// REQUIRES: sg-32
// REQUIRES: aspect-ext_oneapi_chunk

#include <sycl/detail/core.hpp>
#include <sycl/ext/oneapi/experimental/fixed_size_group.hpp>
#include <vector>

//#ifdef __SYCL_DEVICE_ONLY__
//[[__sycl_detail__::__uses_aspects__(sycl::aspect::ext_oneapi_chunk)]]

#include <sycl/detail/core.hpp>
#include <sycl/ext/oneapi/experimental/chunk.hpp>

namespace syclex = sycl::ext::oneapi::experimental;

template <size_t ChunkSize> class TestKernel;
Expand Down Expand Up @@ -71,3 +77,5 @@ int main() {
test<32>();
return 0;
}

//# endif
9 changes: 3 additions & 6 deletions sycl/test-e2e/NonUniformGroups/chunk_algorithms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,16 @@
//
// REQUIRES: cpu || gpu
// REQUIRES: sg-32
// REQUIRES: aspect-ext_oneapi_fixed_size_group
// REQUIRES: aspect-ext_oneapi_chunk

#include <sycl/detail/core.hpp>
#include <sycl/ext/oneapi/experimental/fixed_size_group.hpp>
#include <sycl/ext/oneapi/experimental/chunk.hpp>
#include <sycl/group_algorithm.hpp>
#include <sycl/group_barrier.hpp>
#include <vector>
namespace syclex = sycl::ext::oneapi::experimental;





namespace syclex = sycl::ext::oneapi::experimental;

template <size_t ChunkSize> class TestKernel;

Expand Down
6 changes: 3 additions & 3 deletions sycl/test-e2e/NonUniformGroups/is_user_constructed.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,17 @@
// RUN: %{build} -fsyntax-only -o %t.out

#include <sycl/ext/oneapi/experimental/ballot_group.hpp>
#include <sycl/ext/oneapi/experimental/fixed_size_group.hpp>
#include <sycl/ext/oneapi/experimental/chunk.hpp>
#include <sycl/ext/oneapi/experimental/opportunistic_group.hpp>
#include <sycl/ext/oneapi/experimental/tangle_group.hpp>
namespace syclex = sycl::ext::oneapi::experimental;

static_assert(
syclex::is_user_constructed_group_v<syclex::ballot_group<sycl::sub_group>>);
static_assert(syclex::is_user_constructed_group_v<
syclex::fixed_size_group<1, sycl::sub_group>>);
syclex::chunk<1, sycl::sub_group>>);
static_assert(syclex::is_user_constructed_group_v<
syclex::fixed_size_group<2, sycl::sub_group>>);
syclex::chunk<2, sycl::sub_group>>);
static_assert(
syclex::is_user_constructed_group_v<syclex::tangle_group<sycl::sub_group>>);
static_assert(syclex::is_user_constructed_group_v<syclex::opportunistic_group>);

0 comments on commit fb1d33d

Please sign in to comment.