Skip to content

Commit

Permalink
[SYCL][Graph] Add support for fill and memset nodes in graphs (#11472)
Browse files Browse the repository at this point in the history
- Adds support for fill and memset nodes in graphs.
- Supported on Level Zero only for now.
- Adds E2E and unit tests for these new node types.
- Minor modifications due to renaming of some UR functions.

---------

Co-authored-by: Maxime France-Pillois <[email protected]>
Co-authored-by: Ewan Crawford <[email protected]>
  • Loading branch information
3 people authored Jan 11, 2024
1 parent 218d9fe commit 8ea0229
Show file tree
Hide file tree
Showing 26 changed files with 524 additions and 4 deletions.
4 changes: 4 additions & 0 deletions sycl/doc/design/CommandGraph.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,14 @@ with the following entry-points:
| `urCommandBufferFinalizeExp` | No more commands can be appended, makes command-buffer ready to enqueue on a command-queue. |
| `urCommandBufferAppendKernelLaunchExp` | Append a kernel execution command to command-buffer. |
| `urCommandBufferAppendUSMMemcpyExp` | Append a USM memcpy command to the command-buffer. |
| `urCommandBufferAppendUSMFillExp` | Append a USM fill command to the command-buffer. |
| `urCommandBufferAppendMemBufferCopyExp` | Append a mem buffer copy command to the command-buffer. |
| `urCommandBufferAppendMemBufferWriteExp` | Append a memory write command to a command-buffer object. |
| `urCommandBufferAppendMemBufferReadExp` | Append a memory read command to a command-buffer object. |
| `urCommandBufferAppendMemBufferCopyRectExp` | Append a rectangular memory copy command to a command-buffer object. |
| `urCommandBufferAppendMemBufferWriteRectExp` | Append a rectangular memory write command to a command-buffer object. |
| `urCommandBufferAppendMemBufferReadRectExp` | Append a rectangular memory read command to a command-buffer object. |
| `urCommandBufferAppendMemBufferFillExp` | Append a memory fill command to a command-buffer object. |
| `urCommandBufferEnqueueExp` | Submit command-buffer to a command-queue for execution. |

See the [UR EXP-COMMAND-BUFFER](https://oneapi-src.github.io/unified-runtime/core/EXP-COMMAND-BUFFER.html)
Expand Down Expand Up @@ -347,6 +349,8 @@ The types of commands which are unsupported, and lead to this exception are:
This corresponds to a memory buffer write command.
* `handler::copy(src, dest)` or `handler::memcpy(dest, src)` - Where both `src` and
`dest` are USM pointers. This corresponds to a USM copy command.
* `handler::memset(ptr, value, numBytes)` - This corresponds to a USM memory
fill command.

Note that `handler::copy(src, dest)` where both `src` and `dest` are an accessor
is supported, as a memory buffer copy command exists in the OpenCL extension.
Expand Down
2 changes: 2 additions & 0 deletions sycl/include/sycl/detail/pi.def
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,8 @@ _PI_API(piextCommandBufferMemBufferWrite)
_PI_API(piextCommandBufferMemBufferWriteRect)
_PI_API(piextCommandBufferMemBufferRead)
_PI_API(piextCommandBufferMemBufferReadRect)
_PI_API(piextCommandBufferMemBufferFill)
_PI_API(piextCommandBufferFillUSM)
_PI_API(piextEnqueueCommandBuffer)

_PI_API(piextUSMPitchedAlloc)
Expand Down
45 changes: 41 additions & 4 deletions sycl/include/sycl/detail/pi.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,9 +147,10 @@
// 14.38 Change PI_MEM_ADVICE_* values to flags for use in bitwise operations.
// 14.39 Added PI_EXT_INTEL_DEVICE_INFO_ESIMD_SUPPORT device info query.
// 14.40 Add HIP _pi_mem_advice alises to match the PI_MEM_ADVICE_CUDA* ones.
// 14.41 Added piextCommandBufferMemBufferFill & piextCommandBufferFillUSM

#define _PI_H_VERSION_MAJOR 14
#define _PI_H_VERSION_MINOR 40
#define _PI_H_VERSION_MINOR 41

#define _PI_STRING_HELPER(a) #a
#define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b)
Expand Down Expand Up @@ -2441,7 +2442,7 @@ __SYCL_EXPORT pi_result piextCommandBufferMemBufferReadRect(
pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset,
pi_buff_rect_region region, size_t buffer_row_pitch,
size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch,
void *ptr, pi_uint32 num_events_in_wait_list,
void *ptr, pi_uint32 num_sync_points_in_wait_list,
const pi_ext_sync_point *sync_point_wait_list,
pi_ext_sync_point *sync_point);

Expand All @@ -2458,7 +2459,7 @@ __SYCL_EXPORT pi_result piextCommandBufferMemBufferReadRect(
/// \param sync_point The sync_point associated with this memory operation.
__SYCL_EXPORT pi_result piextCommandBufferMemBufferWrite(
pi_ext_command_buffer command_buffer, pi_mem buffer, size_t offset,
size_t size, const void *ptr, pi_uint32 num_events_in_wait_list,
size_t size, const void *ptr, pi_uint32 num_sync_points_in_wait_list,
const pi_ext_sync_point *sync_point_wait_list,
pi_ext_sync_point *sync_point);

Expand All @@ -2483,7 +2484,43 @@ __SYCL_EXPORT pi_result piextCommandBufferMemBufferWriteRect(
pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset,
pi_buff_rect_region region, size_t buffer_row_pitch,
size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch,
const void *ptr, pi_uint32 num_events_in_wait_list,
const void *ptr, pi_uint32 num_sync_points_in_wait_list,
const pi_ext_sync_point *sync_point_wait_list,
pi_ext_sync_point *sync_point);

/// API to append a mem buffer fill command to the command-buffer.
/// \param command_buffer The command-buffer to append onto.
/// \param buffer is the location to fill the data.
/// \param pattern pointer to the pattern to fill the buffer with.
/// \param pattern_size size of the pattern in bytes.
/// \param offset Offset into the buffer to fill from.
/// \param size fill size in bytes.
/// \param num_sync_points_in_wait_list The number of sync points in the
/// provided wait list.
/// \param sync_point_wait_list A list of sync points that this command must
/// wait on.
/// \param sync_point The sync_point associated with this memory operation.
__SYCL_EXPORT pi_result piextCommandBufferMemBufferFill(
pi_ext_command_buffer command_buffer, pi_mem buffer, const void *pattern,
size_t pattern_size, size_t offset, size_t size,
pi_uint32 num_sync_points_in_wait_list,
const pi_ext_sync_point *sync_point_wait_list,
pi_ext_sync_point *sync_point);

/// API to append a USM fill command to the command-buffer.
/// \param command_buffer The command-buffer to append onto.
/// \param ptr pointer to the USM allocation to fill.
/// \param pattern pointer to the pattern to fill ptr with.
/// \param pattern_size size of the pattern in bytes.
/// \param size fill size in bytes.
/// \param num_sync_points_in_wait_list The number of sync points in the
/// provided wait list.
/// \param sync_point_wait_list A list of sync points that this command must
/// wait on.
/// \param sync_point The sync_point associated with this memory operation.
__SYCL_EXPORT pi_result piextCommandBufferFillUSM(
pi_ext_command_buffer command_buffer, void *ptr, const void *pattern,
size_t pattern_size, size_t size, pi_uint32 num_sync_points_in_wait_list,
const pi_ext_sync_point *sync_point_wait_list,
pi_ext_sync_point *sync_point);

Expand Down
21 changes: 21 additions & 0 deletions sycl/plugins/cuda/pi_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1137,6 +1137,27 @@ pi_result piextCommandBufferMemBufferWriteRect(
NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint);
}

pi_result piextCommandBufferMemBufferFill(
pi_ext_command_buffer CommandBuffer, pi_mem Buffer, const void *Pattern,
size_t PatternSize, size_t Offset, size_t Size,
pi_uint32 NumSyncPointsInWaitList,
const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) {
return pi2ur::piextCommandBufferMemBufferFill(
CommandBuffer, Buffer, Pattern, PatternSize, Offset, Size,
NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint);
}

pi_result piextCommandBufferFillUSM(pi_ext_command_buffer CommandBuffer,
void *Ptr, const void *Pattern,
size_t PatternSize, size_t Size,
pi_uint32 NumSyncPointsInWaitList,
const pi_ext_sync_point *SyncPointWaitList,
pi_ext_sync_point *SyncPoint) {
return pi2ur::piextCommandBufferFillUSM(
CommandBuffer, Ptr, Pattern, PatternSize, Size, NumSyncPointsInWaitList,
SyncPointWaitList, SyncPoint);
}

pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer CommandBuffer,
pi_queue Queue,
pi_uint32 NumEventsInWaitList,
Expand Down
21 changes: 21 additions & 0 deletions sycl/plugins/hip/pi_hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1145,6 +1145,27 @@ pi_result piextCommandBufferMemBufferWriteRect(
NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint);
}

pi_result piextCommandBufferMemBufferFill(
pi_ext_command_buffer CommandBuffer, pi_mem Buffer, const void *Pattern,
size_t PatternSize, size_t Offset, size_t Size,
pi_uint32 NumSyncPointsInWaitList,
const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) {
return pi2ur::piextCommandBufferMemBufferFill(
CommandBuffer, Buffer, Pattern, PatternSize, Offset, Size,
NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint);
}

pi_result piextCommandBufferFillUSM(pi_ext_command_buffer CommandBuffer,
void *Ptr, const void *Pattern,
size_t PatternSize, size_t Size,
pi_uint32 NumSyncPointsInWaitList,
const pi_ext_sync_point *SyncPointWaitList,
pi_ext_sync_point *SyncPoint) {
return pi2ur::piextCommandBufferFillUSM(
CommandBuffer, Ptr, Pattern, PatternSize, Size, NumSyncPointsInWaitList,
SyncPointWaitList, SyncPoint);
}

pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer CommandBuffer,
pi_queue Queue,
pi_uint32 NumEventsInWaitList,
Expand Down
21 changes: 21 additions & 0 deletions sycl/plugins/level_zero/pi_level_zero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1303,6 +1303,27 @@ pi_result piextCommandBufferMemBufferWriteRect(
NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint);
}

pi_result piextCommandBufferMemBufferFill(
pi_ext_command_buffer CommandBuffer, pi_mem Buffer, const void *Pattern,
size_t PatternSize, size_t Offset, size_t Size,
pi_uint32 NumSyncPointsInWaitList,
const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) {
return pi2ur::piextCommandBufferMemBufferFill(
CommandBuffer, Buffer, Pattern, PatternSize, Offset, Size,
NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint);
}

pi_result piextCommandBufferFillUSM(pi_ext_command_buffer CommandBuffer,
void *Ptr, const void *Pattern,
size_t PatternSize, size_t Size,
pi_uint32 NumSyncPointsInWaitList,
const pi_ext_sync_point *SyncPointWaitList,
pi_ext_sync_point *SyncPoint) {
return pi2ur::piextCommandBufferFillUSM(
CommandBuffer, Ptr, Pattern, PatternSize, Size, NumSyncPointsInWaitList,
SyncPointWaitList, SyncPoint);
}

pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer CommandBuffer,
pi_queue Queue,
pi_uint32 NumEventsInWaitList,
Expand Down
21 changes: 21 additions & 0 deletions sycl/plugins/opencl/pi_opencl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1081,6 +1081,27 @@ pi_result piextCommandBufferMemBufferWriteRect(
NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint);
}

pi_result piextCommandBufferMemBufferFill(
pi_ext_command_buffer CommandBuffer, pi_mem Buffer, const void *Pattern,
size_t PatternSize, size_t Offset, size_t Size,
pi_uint32 NumSyncPointsInWaitList,
const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) {
return pi2ur::piextCommandBufferMemBufferFill(
CommandBuffer, Buffer, Pattern, PatternSize, Offset, Size,
NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint);
}

pi_result piextCommandBufferFillUSM(pi_ext_command_buffer CommandBuffer,
void *Ptr, const void *Pattern,
size_t PatternSize, size_t Size,
pi_uint32 NumSyncPointsInWaitList,
const pi_ext_sync_point *SyncPointWaitList,
pi_ext_sync_point *SyncPoint) {
return pi2ur::piextCommandBufferFillUSM(
CommandBuffer, Ptr, Pattern, PatternSize, Size, NumSyncPointsInWaitList,
SyncPointWaitList, SyncPoint);
}

pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer CommandBuffer,
pi_queue Queue,
pi_uint32 NumEventsInWaitList,
Expand Down
31 changes: 31 additions & 0 deletions sycl/plugins/unified_runtime/pi2ur.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4580,6 +4580,37 @@ inline pi_result piextCommandBufferMemBufferWrite(
return PI_SUCCESS;
}

inline pi_result piextCommandBufferMemBufferFill(
pi_ext_command_buffer CommandBuffer, pi_mem Buffer, const void *Pattern,
size_t PatternSize, size_t Offset, size_t Size,
pi_uint32 NumSyncPointsInWaitList,
const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) {
PI_ASSERT(Buffer, PI_ERROR_INVALID_MEM_OBJECT);

ur_exp_command_buffer_handle_t UrCommandBuffer =
reinterpret_cast<ur_exp_command_buffer_handle_t>(CommandBuffer);
ur_mem_handle_t UrBuffer = reinterpret_cast<ur_mem_handle_t>(Buffer);

HANDLE_ERRORS(urCommandBufferAppendMemBufferFillExp(
UrCommandBuffer, UrBuffer, Pattern, PatternSize, Offset, Size,
NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint));
return PI_SUCCESS;
}

inline pi_result piextCommandBufferFillUSM(
pi_ext_command_buffer CommandBuffer, void *Ptr, const void *Pattern,
size_t PatternSize, size_t Size, pi_uint32 NumSyncPointsInWaitList,
const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) {

ur_exp_command_buffer_handle_t UrCommandBuffer =
reinterpret_cast<ur_exp_command_buffer_handle_t>(CommandBuffer);

HANDLE_ERRORS(urCommandBufferAppendUSMFillExp(
UrCommandBuffer, Ptr, Pattern, PatternSize, Size, NumSyncPointsInWaitList,
SyncPointWaitList, SyncPoint));
return PI_SUCCESS;
}

inline pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer CommandBuffer,
pi_queue Queue,
pi_uint32 NumEventsInWaitList,
Expand Down
21 changes: 21 additions & 0 deletions sycl/plugins/unified_runtime/pi_unified_runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1102,6 +1102,27 @@ pi_result piextCommandBufferMemBufferWriteRect(
NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint);
}

pi_result piextCommandBufferMemBufferFill(
pi_ext_command_buffer CommandBuffer, pi_mem Buffer, const void *Pattern,
size_t PatternSize, size_t Offset, size_t Size,
pi_uint32 NumSyncPointsInWaitList,
const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) {
return pi2ur::piextCommandBufferMemBufferFill(
CommandBuffer, Buffer, Pattern, PatternSize, Offset, Size,
NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint);
}

pi_result piextCommandBufferFillUSM(pi_ext_command_buffer CommandBuffer,
void *Ptr, const void *Pattern,
size_t PatternSize, size_t Size,
pi_uint32 NumSyncPointsInWaitList,
const pi_ext_sync_point *SyncPointWaitList,
pi_ext_sync_point *SyncPoint) {
return pi2ur::piextCommandBufferFillUSM(
CommandBuffer, Ptr, Pattern, PatternSize, Size, NumSyncPointsInWaitList,
SyncPointWaitList, SyncPoint);
}

pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer CommandBuffer,
pi_queue Queue,
pi_uint32 NumEventsInWaitList,
Expand Down
44 changes: 44 additions & 0 deletions sycl/source/detail/memory_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1666,6 +1666,50 @@ void MemoryManager::ext_oneapi_copy_usm_cmd_buffer(
}
}

void MemoryManager::ext_oneapi_fill_usm_cmd_buffer(
sycl::detail::ContextImplPtr Context,
sycl::detail::pi::PiExtCommandBuffer CommandBuffer, void *DstMem,
size_t Len, int Pattern, std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) {

if (!DstMem)
throw runtime_error("NULL pointer argument in memory fill operation.",
PI_ERROR_INVALID_VALUE);

const PluginPtr &Plugin = Context->getPlugin();
// Pattern is interpreted as an unsigned char so pattern size is always 1.
size_t PatternSize = 1;
Plugin->call<PiApiKind::piextCommandBufferFillUSM>(
CommandBuffer, DstMem, &Pattern, PatternSize, Len, Deps.size(),
Deps.data(), OutSyncPoint);
}

void MemoryManager::ext_oneapi_fill_cmd_buffer(
sycl::detail::ContextImplPtr Context,
sycl::detail::pi::PiExtCommandBuffer CommandBuffer, SYCLMemObjI *SYCLMemObj,
void *Mem, size_t PatternSize, const char *Pattern, unsigned int Dim,
sycl::range<3> Size, sycl::range<3> AccessRange, sycl::id<3> AccessOffset,
unsigned int ElementSize,
std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) {
assert(SYCLMemObj && "The SYCLMemObj is nullptr");

const PluginPtr &Plugin = Context->getPlugin();
if (SYCLMemObj->getType() != detail::SYCLMemObjI::MemObjType::Buffer) {
throw sycl::exception(sycl::make_error_code(sycl::errc::invalid),
"Images are not supported in Graphs");
}
if (Dim <= 1) {
Plugin->call<PiApiKind::piextCommandBufferMemBufferFill>(
CommandBuffer, pi::cast<sycl::detail::pi::PiMem>(Mem), Pattern,
PatternSize, AccessOffset[0] * ElementSize,
AccessRange[0] * ElementSize, Deps.size(), Deps.data(), OutSyncPoint);
return;
}
throw runtime_error("Not supported configuration of fill requested",
PI_ERROR_INVALID_OPERATION);
}

void MemoryManager::copy_image_bindless(
void *Src, QueueImplPtr Queue, void *Dst,
const sycl::detail::pi::PiMemImageDesc &Desc,
Expand Down
18 changes: 18 additions & 0 deletions sycl/source/detail/memory_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,24 @@ class __SYCL_EXPORT MemoryManager {
void *DstMem, std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
sycl::detail::pi::PiExtSyncPoint *OutSyncPoint);

static void ext_oneapi_fill_usm_cmd_buffer(
sycl::detail::ContextImplPtr Context,
sycl::detail::pi::PiExtCommandBuffer CommandBuffer, void *DstMem,
size_t Len, int Pattern,
std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
sycl::detail::pi::PiExtSyncPoint *OutSyncPoint);

static void
ext_oneapi_fill_cmd_buffer(sycl::detail::ContextImplPtr Context,
sycl::detail::pi::PiExtCommandBuffer CommandBuffer,
SYCLMemObjI *SYCLMemObj, void *Mem,
size_t PatternSize, const char *Pattern,
unsigned int Dim, sycl::range<3> Size,
sycl::range<3> AccessRange,
sycl::id<3> AccessOffset, unsigned int ElementSize,
std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
sycl::detail::pi::PiExtSyncPoint *OutSyncPoint);

static void
copy_image_bindless(void *Src, QueueImplPtr Queue, void *Dst,
const sycl::detail::pi::PiMemImageDesc &Desc,
Expand Down
22 changes: 22 additions & 0 deletions sycl/source/detail/scheduler/commands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2764,6 +2764,28 @@ pi_int32 ExecCGCommand::enqueueImpCommandBuffer() {
MEvent->setSyncPoint(OutSyncPoint);
return PI_SUCCESS;
}
case CG::CGTYPE::Fill: {
CGFill *Fill = (CGFill *)MCommandGroup.get();
Requirement *Req = (Requirement *)(Fill->getReqToFill());
AllocaCommandBase *AllocaCmd = getAllocaForReq(Req);

MemoryManager::ext_oneapi_fill_cmd_buffer(
MQueue->getContextImplPtr(), MCommandBuffer, AllocaCmd->getSYCLMemObj(),
AllocaCmd->getMemAllocation(), Fill->MPattern.size(),
Fill->MPattern.data(), Req->MDims, Req->MMemoryRange, Req->MAccessRange,
Req->MOffset, Req->MElemSize, std::move(MSyncPointDeps), &OutSyncPoint);
MEvent->setSyncPoint(OutSyncPoint);
return PI_SUCCESS;
}
case CG::CGTYPE::FillUSM: {
CGFillUSM *Fill = (CGFillUSM *)MCommandGroup.get();
MemoryManager::ext_oneapi_fill_usm_cmd_buffer(
MQueue->getContextImplPtr(), MCommandBuffer, Fill->getDst(),
Fill->getLength(), Fill->getFill(), std::move(MSyncPointDeps),
&OutSyncPoint);
MEvent->setSyncPoint(OutSyncPoint);
return PI_SUCCESS;
}
default:
throw runtime_error("CG type not implemented for command buffers.",
PI_ERROR_INVALID_OPERATION);
Expand Down
Loading

0 comments on commit 8ea0229

Please sign in to comment.