Skip to content

Commit

Permalink
[SYCL][Bindless] Image Array Sub-Region Copy (intel#14954)
Browse files Browse the repository at this point in the history
* Add support for sub-region copies of image arrays.
* Initial implementation of tests.

UR PR: oneapi-src/unified-runtime#1928
  • Loading branch information
isaacault authored Sep 12, 2024
1 parent 404fb8a commit 5b2f1d2
Show file tree
Hide file tree
Showing 5 changed files with 332 additions and 19 deletions.
12 changes: 6 additions & 6 deletions sycl/cmake/modules/FetchUnifiedRuntime.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -117,13 +117,13 @@ if(SYCL_UR_USE_FETCH_CONTENT)
endfunction()

set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
# commit 24a8299efc59c715a1c2dd180692a5e12a12283a
# Merge: eb63d1a2 2fea679d
# commit 2bbe952669861579ea84fa30f14e1ed27ead0692
# Merge: d357964a 6b353545
# Author: Omar Ahmed <[email protected]>
# Date: Wed Sep 11 10:40:59 2024 +0100
# Merge pull request #2078 from callumfare/callum/fix_device_extensions_fpga
# Add workaround for silently supported OpenCL extensions on Intel FPGA
set(UNIFIED_RUNTIME_TAG 24a8299efc59c715a1c2dd180692a5e12a12283a)
# Date: Thu Sep 12 11:36:11 2024 +0100
# Merge pull request #1928 from isaacault/iault/image_array_copy
# [Bindless][Exp] Image Array Sub-Region Copies
set(UNIFIED_RUNTIME_TAG 2bbe952669861579ea84fa30f14e1ed27ead0692)

set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES")
# Due to the use of dependentloadflag and no installer for UMF and hwloc we need
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -744,7 +744,7 @@ address mode `clamp_to_edge` will be applied for all dimensions. If the
performed when sampling along the cube face borders.
====

=== Explicit copies
=== Explicit copies [[explicit_copies]]

```cpp
namespace sycl {
Expand Down Expand Up @@ -1398,9 +1398,14 @@ As with allocation, the descriptor must be populated appropriately, i.e.

=== Copying image array data [[copying_image_array_data]]

When copying to or from image arrays, the user should copy to/from the entire
array of images in one call to `ext_oneapi_copy` by passing the image arrays'
`image_mem_handle`.
When copying to or from image arrays, the user should utilize `ext_oneapi_copy`
and pass the image arrays' `image_mem_handle`, and any applicable sub-region
copy parameters, as outlined in <<explicit_copies>>.

In order to copy to specific layers of an image array, the offset and extent
parameters involved in sub-region copies must be populated such that the 3rd
dimension of the ranges represent the arrays' layer(s) being copied, regardless
of whether the copy is performed on a 1D or 2D image array.

=== Reading an image array

Expand Down Expand Up @@ -2888,4 +2893,5 @@ These features still need to be handled:
`map_external_linear_memory`.
|6 |2024-08-05 | - Collated all changes since revision 5.
- Bumped SYCL_EXT_ONEAPI_BINDLESS_IMAGES to number 6.
|6.1|2024-09-09| - Update for image-array sub-region copy support.
|======================
35 changes: 26 additions & 9 deletions sycl/source/handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//

#include "ur_api.h"
#include "sycl/detail/helpers.hpp"
#include "ur_api.h"
#include <algorithm>

#include <detail/config.hpp>
Expand Down Expand Up @@ -1046,10 +1046,15 @@ void handler::ext_oneapi_copy(
Desc.type == sycl::ext::oneapi::experimental::image_type::cubemap
? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP
: UrDesc.type;

// Array size is depth extent.
impl->MCopyExtent = {Desc.width, Desc.height, Desc.array_size};
} else {
UrDesc.type = Desc.depth > 0 ? UR_MEM_TYPE_IMAGE3D
: (Desc.height > 0 ? UR_MEM_TYPE_IMAGE2D
: UR_MEM_TYPE_IMAGE1D);

impl->MCopyExtent = {Desc.width, Desc.height, Desc.depth};
}

ur_image_format_t UrFormat;
Expand All @@ -1061,7 +1066,6 @@ void handler::ext_oneapi_copy(

impl->MSrcOffset = {0, 0, 0};
impl->MDestOffset = {0, 0, 0};
impl->MCopyExtent = {Desc.width, Desc.height, Desc.depth};
impl->MSrcImageDesc = UrDesc;
impl->MDstImageDesc = UrDesc;
impl->MSrcImageFormat = UrFormat;
Expand Down Expand Up @@ -1136,7 +1140,7 @@ void handler::ext_oneapi_copy(
sycl_ext_oneapi_bindless_images>();
Desc.verify();

MSrcPtr = reinterpret_cast<void*>(Src.raw_handle);
MSrcPtr = reinterpret_cast<void *>(Src.raw_handle);
MDstPtr = Dest;

ur_image_desc_t UrDesc = {};
Expand All @@ -1156,10 +1160,15 @@ void handler::ext_oneapi_copy(
Desc.type == sycl::ext::oneapi::experimental::image_type::cubemap
? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP
: UrDesc.type;

// Array size is depth extent.
impl->MCopyExtent = {Desc.width, Desc.height, Desc.array_size};
} else {
UrDesc.type = Desc.depth > 0 ? UR_MEM_TYPE_IMAGE3D
: (Desc.height > 0 ? UR_MEM_TYPE_IMAGE2D
: UR_MEM_TYPE_IMAGE1D);

impl->MCopyExtent = {Desc.width, Desc.height, Desc.depth};
}

ur_image_format_t UrFormat;
Expand All @@ -1171,7 +1180,6 @@ void handler::ext_oneapi_copy(

impl->MSrcOffset = {0, 0, 0};
impl->MDestOffset = {0, 0, 0};
impl->MCopyExtent = {Desc.width, Desc.height, Desc.depth};
impl->MSrcImageDesc = UrDesc;
impl->MDstImageDesc = UrDesc;
impl->MSrcImageFormat = UrFormat;
Expand All @@ -1189,8 +1197,8 @@ void handler::ext_oneapi_copy(
sycl_ext_oneapi_bindless_images>();
ImageDesc.verify();

MSrcPtr = reinterpret_cast<void*>(Src.raw_handle);
MDstPtr = reinterpret_cast<void*>(Dest.raw_handle);
MSrcPtr = reinterpret_cast<void *>(Src.raw_handle);
MDstPtr = reinterpret_cast<void *>(Dest.raw_handle);

ur_image_desc_t UrDesc = {};
UrDesc.stype = UR_STRUCTURE_TYPE_IMAGE_DESC;
Expand All @@ -1208,11 +1216,17 @@ void handler::ext_oneapi_copy(
ImageDesc.type == sycl::ext::oneapi::experimental::image_type::cubemap
? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP
: UrDesc.type;

// Array size is depth extent.
impl->MCopyExtent = {ImageDesc.width, ImageDesc.height,
ImageDesc.array_size};
} else {
UrDesc.type = ImageDesc.depth > 0
? UR_MEM_TYPE_IMAGE3D
: (ImageDesc.height > 0 ? UR_MEM_TYPE_IMAGE2D
: UR_MEM_TYPE_IMAGE1D);

impl->MCopyExtent = {ImageDesc.width, ImageDesc.height, ImageDesc.depth};
}

ur_image_format_t UrFormat;
Expand All @@ -1224,7 +1238,6 @@ void handler::ext_oneapi_copy(

impl->MSrcOffset = {0, 0, 0};
impl->MDestOffset = {0, 0, 0};
impl->MCopyExtent = {ImageDesc.width, ImageDesc.height, ImageDesc.depth};
impl->MSrcImageDesc = UrDesc;
impl->MDstImageDesc = UrDesc;
impl->MSrcImageFormat = UrFormat;
Expand All @@ -1244,7 +1257,7 @@ void handler::ext_oneapi_copy(
sycl_ext_oneapi_bindless_images>();
SrcImgDesc.verify();

MSrcPtr = reinterpret_cast<void*>(Src.raw_handle);
MSrcPtr = reinterpret_cast<void *>(Src.raw_handle);
MDstPtr = Dest;

ur_image_desc_t UrDesc = {};
Expand Down Expand Up @@ -1320,10 +1333,15 @@ void handler::ext_oneapi_copy(
Desc.type == sycl::ext::oneapi::experimental::image_type::cubemap
? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP
: UrDesc.type;

// Array size is depth extent.
impl->MCopyExtent = {Desc.width, Desc.height, Desc.array_size};
} else {
UrDesc.type = Desc.depth > 0 ? UR_MEM_TYPE_IMAGE3D
: (Desc.height > 0 ? UR_MEM_TYPE_IMAGE2D
: UR_MEM_TYPE_IMAGE1D);

impl->MCopyExtent = {Desc.width, Desc.height, Desc.depth};
}

ur_image_format_t UrFormat;
Expand All @@ -1335,7 +1353,6 @@ void handler::ext_oneapi_copy(

impl->MSrcOffset = {0, 0, 0};
impl->MDestOffset = {0, 0, 0};
impl->MCopyExtent = {Desc.width, Desc.height, Desc.depth};
impl->MSrcImageDesc = UrDesc;
impl->MDstImageDesc = UrDesc;
impl->MSrcImageFormat = UrFormat;
Expand Down
145 changes: 145 additions & 0 deletions sycl/test-e2e/bindless_images/array/read_write_1d_subregion.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
// REQUIRES: cuda

// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

#include <iostream>
#include <sycl/detail/core.hpp>

#include <sycl/ext/oneapi/bindless_images.hpp>

// Uncomment to print additional test information
// #define VERBOSE_PRINT

class image_addition;

int main() {

sycl::device dev;
sycl::queue q(dev);
auto ctxt = q.get_context();

// declare image data
size_t width = 4;
size_t layers = 2;
size_t N = width * layers;
std::vector<float> out(N);
std::vector<float> expected(N);
std::vector<float> dataIn1(N);
std::vector<float> dataIn2(N);
for (int i = 0; i < width; i++) {
for (int j = 0; j < layers; j++) {
expected[j + ((layers)*i)] = (j + (layers)*i) * 3;
dataIn1[j + ((layers)*i)] = (j + (layers)*i);
dataIn2[j + ((layers)*i)] = (j + (layers)*i) * 2;
}
}

// Image descriptor - can use the same for both images
sycl::ext::oneapi::experimental::image_descriptor desc(
{width}, 1, sycl::image_channel_type::fp32,
sycl::ext::oneapi::experimental::image_type::array, 1, layers);

try {
// Extension: allocate memory on device and create the handle
sycl::ext::oneapi::experimental::image_mem imgMem0(desc, q);
sycl::ext::oneapi::experimental::image_mem imgMem1(desc, q);
sycl::ext::oneapi::experimental::image_mem imgMem2(desc, q);

// Extension: create the image and return the handle
sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle1 =
sycl::ext::oneapi::experimental::create_image(imgMem0, desc, q);
sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle2 =
sycl::ext::oneapi::experimental::create_image(imgMem1, desc, q);
sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle3 =
sycl::ext::oneapi::experimental::create_image(imgMem2, desc, q);

// The subregion size for the copies.
sycl::range copyExtent = {width / 2, 1, layers / 2};
// The extent of data provided on the host (vector).
sycl::range srcExtent = {width, 1, layers};

// the 4 subregion offsets used for the copies.
std::vector<sycl::range<3>> offsets{{0, 0, 0},
{width / 2, 0, 0},
{0, 0, layers / 2},
{width / 2, 0, layers / 2}};

for (auto offset : offsets) {
// Extension: Copy to image array subregion.
q.ext_oneapi_copy(dataIn1.data(), offset, srcExtent, imgMem0.get_handle(),
offset, desc, copyExtent);
// Extension: Copy to image array subregion.
q.ext_oneapi_copy(dataIn2.data(), offset, srcExtent, imgMem1.get_handle(),
offset, desc, copyExtent);
}
q.wait_and_throw();

q.submit([&](sycl::handler &cgh) {
cgh.parallel_for<image_addition>(
sycl::nd_range<2>{{width, layers}, {width, layers}},
[=](sycl::nd_item<2> it) {
size_t dim0 = it.get_local_id(0);
size_t dim1 = it.get_local_id(1);
float sum = 0;
// Extension: fetch image data from handle
float px1 =
sycl::ext::oneapi::experimental::fetch_image_array<float>(
imgHandle1, int(dim0), dim1);
float px2 =
sycl::ext::oneapi::experimental::fetch_image_array<float>(
imgHandle2, int(dim0), dim1);

sum = px1 + px2;

// Extension: write to image with handle
sycl::ext::oneapi::experimental::write_image_array<float>(
imgHandle3, int(dim0), dim1, sum);
});
});
q.wait_and_throw();

// Extension: copy data from device to host (four subregions/quadrants)
for (auto offset : offsets) {
q.ext_oneapi_copy(imgMem2.get_handle(), offset, desc, out.data(), offset,
srcExtent, copyExtent);
}
q.wait_and_throw();

// Extension: cleanup
sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle1, q);
sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle2, q);
} catch (sycl::exception e) {
std::cerr << "SYCL exception caught! : " << e.what() << "\n";
return 1;
} catch (...) {
std::cerr << "Unknown exception caught!\n";
return 2;
}

// collect and validate output
bool validated = true;
for (int i = 0; i < N; i++) {
bool mismatch = false;
if (out[i] != expected[i]) {
mismatch = true;
validated = false;
}

if (mismatch) {
#ifdef VERBOSE_PRINT
std::cout << "Result mismatch! Expected: " << expected[i]
<< ", Actual: " << out[i] << std::endl;
#else
break;
#endif
}
}
if (validated) {
std::cout << "Test passed!" << std::endl;
return 0;
}

std::cout << "Test failed!" << std::endl;
return 3;
}
Loading

0 comments on commit 5b2f1d2

Please sign in to comment.