diff --git a/sycl/doc/design/CommandGraph.md b/sycl/doc/design/CommandGraph.md index 7c405d5ca791b..fed89532ff7ce 100644 --- a/sycl/doc/design/CommandGraph.md +++ b/sycl/doc/design/CommandGraph.md @@ -37,12 +37,14 @@ with the following entry-points: | `urCommandBufferFinalizeExp` | No more commands can be appended, makes command-buffer ready to enqueue on a command-queue. | | `urCommandBufferAppendKernelLaunchExp` | Append a kernel execution command to command-buffer. | | `urCommandBufferAppendUSMMemcpyExp` | Append a USM memcpy command to the command-buffer. | +| `urCommandBufferAppendUSMFillExp` | Append a USM fill command to the command-buffer. | | `urCommandBufferAppendMemBufferCopyExp` | Append a mem buffer copy command to the command-buffer. | | `urCommandBufferAppendMemBufferWriteExp` | Append a memory write command to a command-buffer object. | | `urCommandBufferAppendMemBufferReadExp` | Append a memory read command to a command-buffer object. | | `urCommandBufferAppendMemBufferCopyRectExp` | Append a rectangular memory copy command to a command-buffer object. | | `urCommandBufferAppendMemBufferWriteRectExp` | Append a rectangular memory write command to a command-buffer object. | | `urCommandBufferAppendMemBufferReadRectExp` | Append a rectangular memory read command to a command-buffer object. | +| `urCommandBufferAppendMemBufferFillExp` | Append a memory fill command to a command-buffer object. | | `urCommandBufferEnqueueExp` | Submit command-buffer to a command-queue for execution. | See the [UR EXP-COMMAND-BUFFER](https://oneapi-src.github.io/unified-runtime/core/EXP-COMMAND-BUFFER.html) @@ -347,6 +349,8 @@ The types of commands which are unsupported, and lead to this exception are: This corresponds to a memory buffer write command. * `handler::copy(src, dest)` or `handler::memcpy(dest, src)` - Where both `src` and `dest` are USM pointers. This corresponds to a USM copy command. +* `handler::memset(ptr, value, numBytes)` - This corresponds to a USM memory + fill command. Note that `handler::copy(src, dest)` where both `src` and `dest` are an accessor is supported, as a memory buffer copy command exists in the OpenCL extension. diff --git a/sycl/include/sycl/detail/pi.def b/sycl/include/sycl/detail/pi.def index 69513335bf191..d963cfb860f4e 100644 --- a/sycl/include/sycl/detail/pi.def +++ b/sycl/include/sycl/detail/pi.def @@ -176,6 +176,8 @@ _PI_API(piextCommandBufferMemBufferWrite) _PI_API(piextCommandBufferMemBufferWriteRect) _PI_API(piextCommandBufferMemBufferRead) _PI_API(piextCommandBufferMemBufferReadRect) +_PI_API(piextCommandBufferMemBufferFill) +_PI_API(piextCommandBufferFillUSM) _PI_API(piextEnqueueCommandBuffer) _PI_API(piextUSMPitchedAlloc) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 010c59dd3c9d6..9860906e0f847 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -147,9 +147,10 @@ // 14.38 Change PI_MEM_ADVICE_* values to flags for use in bitwise operations. // 14.39 Added PI_EXT_INTEL_DEVICE_INFO_ESIMD_SUPPORT device info query. // 14.40 Add HIP _pi_mem_advice alises to match the PI_MEM_ADVICE_CUDA* ones. +// 14.41 Added piextCommandBufferMemBufferFill & piextCommandBufferFillUSM #define _PI_H_VERSION_MAJOR 14 -#define _PI_H_VERSION_MINOR 40 +#define _PI_H_VERSION_MINOR 41 #define _PI_STRING_HELPER(a) #a #define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b) @@ -2441,7 +2442,7 @@ __SYCL_EXPORT pi_result piextCommandBufferMemBufferReadRect( pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, - void *ptr, pi_uint32 num_events_in_wait_list, + void *ptr, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point); @@ -2458,7 +2459,7 @@ __SYCL_EXPORT pi_result piextCommandBufferMemBufferReadRect( /// \param sync_point The sync_point associated with this memory operation. __SYCL_EXPORT pi_result piextCommandBufferMemBufferWrite( pi_ext_command_buffer command_buffer, pi_mem buffer, size_t offset, - size_t size, const void *ptr, pi_uint32 num_events_in_wait_list, + size_t size, const void *ptr, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point); @@ -2483,7 +2484,43 @@ __SYCL_EXPORT pi_result piextCommandBufferMemBufferWriteRect( pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, - const void *ptr, pi_uint32 num_events_in_wait_list, + const void *ptr, pi_uint32 num_sync_points_in_wait_list, + const pi_ext_sync_point *sync_point_wait_list, + pi_ext_sync_point *sync_point); + +/// API to append a mem buffer fill command to the command-buffer. +/// \param command_buffer The command-buffer to append onto. +/// \param buffer is the location to fill the data. +/// \param pattern pointer to the pattern to fill the buffer with. +/// \param pattern_size size of the pattern in bytes. +/// \param offset Offset into the buffer to fill from. +/// \param size fill size in bytes. +/// \param num_sync_points_in_wait_list The number of sync points in the +/// provided wait list. +/// \param sync_point_wait_list A list of sync points that this command must +/// wait on. +/// \param sync_point The sync_point associated with this memory operation. +__SYCL_EXPORT pi_result piextCommandBufferMemBufferFill( + pi_ext_command_buffer command_buffer, pi_mem buffer, const void *pattern, + size_t pattern_size, size_t offset, size_t size, + pi_uint32 num_sync_points_in_wait_list, + const pi_ext_sync_point *sync_point_wait_list, + pi_ext_sync_point *sync_point); + +/// API to append a USM fill command to the command-buffer. +/// \param command_buffer The command-buffer to append onto. +/// \param ptr pointer to the USM allocation to fill. +/// \param pattern pointer to the pattern to fill ptr with. +/// \param pattern_size size of the pattern in bytes. +/// \param size fill size in bytes. +/// \param num_sync_points_in_wait_list The number of sync points in the +/// provided wait list. +/// \param sync_point_wait_list A list of sync points that this command must +/// wait on. +/// \param sync_point The sync_point associated with this memory operation. +__SYCL_EXPORT pi_result piextCommandBufferFillUSM( + pi_ext_command_buffer command_buffer, void *ptr, const void *pattern, + size_t pattern_size, size_t size, pi_uint32 num_sync_points_in_wait_list, const pi_ext_sync_point *sync_point_wait_list, pi_ext_sync_point *sync_point); diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 94a39137ec4f7..5eb06f37b2237 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -1137,6 +1137,27 @@ pi_result piextCommandBufferMemBufferWriteRect( NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); } +pi_result piextCommandBufferMemBufferFill( + pi_ext_command_buffer CommandBuffer, pi_mem Buffer, const void *Pattern, + size_t PatternSize, size_t Offset, size_t Size, + pi_uint32 NumSyncPointsInWaitList, + const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { + return pi2ur::piextCommandBufferMemBufferFill( + CommandBuffer, Buffer, Pattern, PatternSize, Offset, Size, + NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); +} + +pi_result piextCommandBufferFillUSM(pi_ext_command_buffer CommandBuffer, + void *Ptr, const void *Pattern, + size_t PatternSize, size_t Size, + pi_uint32 NumSyncPointsInWaitList, + const pi_ext_sync_point *SyncPointWaitList, + pi_ext_sync_point *SyncPoint) { + return pi2ur::piextCommandBufferFillUSM( + CommandBuffer, Ptr, Pattern, PatternSize, Size, NumSyncPointsInWaitList, + SyncPointWaitList, SyncPoint); +} + pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer CommandBuffer, pi_queue Queue, pi_uint32 NumEventsInWaitList, diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 7095526dc1d34..775183d82d239 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -1145,6 +1145,27 @@ pi_result piextCommandBufferMemBufferWriteRect( NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); } +pi_result piextCommandBufferMemBufferFill( + pi_ext_command_buffer CommandBuffer, pi_mem Buffer, const void *Pattern, + size_t PatternSize, size_t Offset, size_t Size, + pi_uint32 NumSyncPointsInWaitList, + const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { + return pi2ur::piextCommandBufferMemBufferFill( + CommandBuffer, Buffer, Pattern, PatternSize, Offset, Size, + NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); +} + +pi_result piextCommandBufferFillUSM(pi_ext_command_buffer CommandBuffer, + void *Ptr, const void *Pattern, + size_t PatternSize, size_t Size, + pi_uint32 NumSyncPointsInWaitList, + const pi_ext_sync_point *SyncPointWaitList, + pi_ext_sync_point *SyncPoint) { + return pi2ur::piextCommandBufferFillUSM( + CommandBuffer, Ptr, Pattern, PatternSize, Size, NumSyncPointsInWaitList, + SyncPointWaitList, SyncPoint); +} + pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer CommandBuffer, pi_queue Queue, pi_uint32 NumEventsInWaitList, diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 4816341bb0f01..b4231ffdaff03 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -1304,6 +1304,27 @@ pi_result piextCommandBufferMemBufferWriteRect( NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); } +pi_result piextCommandBufferMemBufferFill( + pi_ext_command_buffer CommandBuffer, pi_mem Buffer, const void *Pattern, + size_t PatternSize, size_t Offset, size_t Size, + pi_uint32 NumSyncPointsInWaitList, + const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { + return pi2ur::piextCommandBufferMemBufferFill( + CommandBuffer, Buffer, Pattern, PatternSize, Offset, Size, + NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); +} + +pi_result piextCommandBufferFillUSM(pi_ext_command_buffer CommandBuffer, + void *Ptr, const void *Pattern, + size_t PatternSize, size_t Size, + pi_uint32 NumSyncPointsInWaitList, + const pi_ext_sync_point *SyncPointWaitList, + pi_ext_sync_point *SyncPoint) { + return pi2ur::piextCommandBufferFillUSM( + CommandBuffer, Ptr, Pattern, PatternSize, Size, NumSyncPointsInWaitList, + SyncPointWaitList, SyncPoint); +} + pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer CommandBuffer, pi_queue Queue, pi_uint32 NumEventsInWaitList, diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 6cc6a325af923..7512d411144ab 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -1081,6 +1081,27 @@ pi_result piextCommandBufferMemBufferWriteRect( NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); } +pi_result piextCommandBufferMemBufferFill( + pi_ext_command_buffer CommandBuffer, pi_mem Buffer, const void *Pattern, + size_t PatternSize, size_t Offset, size_t Size, + pi_uint32 NumSyncPointsInWaitList, + const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { + return pi2ur::piextCommandBufferMemBufferFill( + CommandBuffer, Buffer, Pattern, PatternSize, Offset, Size, + NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); +} + +pi_result piextCommandBufferFillUSM(pi_ext_command_buffer CommandBuffer, + void *Ptr, const void *Pattern, + size_t PatternSize, size_t Size, + pi_uint32 NumSyncPointsInWaitList, + const pi_ext_sync_point *SyncPointWaitList, + pi_ext_sync_point *SyncPoint) { + return pi2ur::piextCommandBufferFillUSM( + CommandBuffer, Ptr, Pattern, PatternSize, Size, NumSyncPointsInWaitList, + SyncPointWaitList, SyncPoint); +} + pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer CommandBuffer, pi_queue Queue, pi_uint32 NumEventsInWaitList, diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index d3051c47bd93b..75d1bd598e80a 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -4580,6 +4580,37 @@ inline pi_result piextCommandBufferMemBufferWrite( return PI_SUCCESS; } +inline pi_result piextCommandBufferMemBufferFill( + pi_ext_command_buffer CommandBuffer, pi_mem Buffer, const void *Pattern, + size_t PatternSize, size_t Offset, size_t Size, + pi_uint32 NumSyncPointsInWaitList, + const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { + PI_ASSERT(Buffer, PI_ERROR_INVALID_MEM_OBJECT); + + ur_exp_command_buffer_handle_t UrCommandBuffer = + reinterpret_cast(CommandBuffer); + ur_mem_handle_t UrBuffer = reinterpret_cast(Buffer); + + HANDLE_ERRORS(urCommandBufferAppendMemBufferFillExp( + UrCommandBuffer, UrBuffer, Pattern, PatternSize, Offset, Size, + NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint)); + return PI_SUCCESS; +} + +inline pi_result piextCommandBufferFillUSM( + pi_ext_command_buffer CommandBuffer, void *Ptr, const void *Pattern, + size_t PatternSize, size_t Size, pi_uint32 NumSyncPointsInWaitList, + const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { + + ur_exp_command_buffer_handle_t UrCommandBuffer = + reinterpret_cast(CommandBuffer); + + HANDLE_ERRORS(urCommandBufferAppendUSMFillExp( + UrCommandBuffer, Ptr, Pattern, PatternSize, Size, NumSyncPointsInWaitList, + SyncPointWaitList, SyncPoint)); + return PI_SUCCESS; +} + inline pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer CommandBuffer, pi_queue Queue, pi_uint32 NumEventsInWaitList, diff --git a/sycl/plugins/unified_runtime/pi_unified_runtime.cpp b/sycl/plugins/unified_runtime/pi_unified_runtime.cpp index ab5b801c3fda3..7cb9fdbb9b554 100644 --- a/sycl/plugins/unified_runtime/pi_unified_runtime.cpp +++ b/sycl/plugins/unified_runtime/pi_unified_runtime.cpp @@ -1102,6 +1102,27 @@ pi_result piextCommandBufferMemBufferWriteRect( NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); } +pi_result piextCommandBufferMemBufferFill( + pi_ext_command_buffer CommandBuffer, pi_mem Buffer, const void *Pattern, + size_t PatternSize, size_t Offset, size_t Size, + pi_uint32 NumSyncPointsInWaitList, + const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { + return pi2ur::piextCommandBufferMemBufferFill( + CommandBuffer, Buffer, Pattern, PatternSize, Offset, Size, + NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); +} + +pi_result piextCommandBufferFillUSM(pi_ext_command_buffer CommandBuffer, + void *Ptr, const void *Pattern, + size_t PatternSize, size_t Size, + pi_uint32 NumSyncPointsInWaitList, + const pi_ext_sync_point *SyncPointWaitList, + pi_ext_sync_point *SyncPoint) { + return pi2ur::piextCommandBufferFillUSM( + CommandBuffer, Ptr, Pattern, PatternSize, Size, NumSyncPointsInWaitList, + SyncPointWaitList, SyncPoint); +} + pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer CommandBuffer, pi_queue Queue, pi_uint32 NumEventsInWaitList, diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index ae357a8f4fe5b..d0071dbabd15a 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -1666,6 +1666,50 @@ void MemoryManager::ext_oneapi_copy_usm_cmd_buffer( } } +void MemoryManager::ext_oneapi_fill_usm_cmd_buffer( + sycl::detail::ContextImplPtr Context, + sycl::detail::pi::PiExtCommandBuffer CommandBuffer, void *DstMem, + size_t Len, int Pattern, std::vector Deps, + sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) { + + if (!DstMem) + throw runtime_error("NULL pointer argument in memory fill operation.", + PI_ERROR_INVALID_VALUE); + + const PluginPtr &Plugin = Context->getPlugin(); + // Pattern is interpreted as an unsigned char so pattern size is always 1. + size_t PatternSize = 1; + Plugin->call( + CommandBuffer, DstMem, &Pattern, PatternSize, Len, Deps.size(), + Deps.data(), OutSyncPoint); +} + +void MemoryManager::ext_oneapi_fill_cmd_buffer( + sycl::detail::ContextImplPtr Context, + sycl::detail::pi::PiExtCommandBuffer CommandBuffer, SYCLMemObjI *SYCLMemObj, + void *Mem, size_t PatternSize, const char *Pattern, unsigned int Dim, + sycl::range<3> Size, sycl::range<3> AccessRange, sycl::id<3> AccessOffset, + unsigned int ElementSize, + std::vector Deps, + sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) { + assert(SYCLMemObj && "The SYCLMemObj is nullptr"); + + const PluginPtr &Plugin = Context->getPlugin(); + if (SYCLMemObj->getType() != detail::SYCLMemObjI::MemObjType::Buffer) { + throw sycl::exception(sycl::make_error_code(sycl::errc::invalid), + "Images are not supported in Graphs"); + } + if (Dim <= 1) { + Plugin->call( + CommandBuffer, pi::cast(Mem), Pattern, + PatternSize, AccessOffset[0] * ElementSize, + AccessRange[0] * ElementSize, Deps.size(), Deps.data(), OutSyncPoint); + return; + } + throw runtime_error("Not supported configuration of fill requested", + PI_ERROR_INVALID_OPERATION); +} + void MemoryManager::copy_image_bindless( void *Src, QueueImplPtr Queue, void *Dst, const sycl::detail::pi::PiMemImageDesc &Desc, diff --git a/sycl/source/detail/memory_manager.hpp b/sycl/source/detail/memory_manager.hpp index a1b68b1418c69..6169c99392f66 100644 --- a/sycl/source/detail/memory_manager.hpp +++ b/sycl/source/detail/memory_manager.hpp @@ -316,6 +316,24 @@ class __SYCL_EXPORT MemoryManager { void *DstMem, std::vector Deps, sycl::detail::pi::PiExtSyncPoint *OutSyncPoint); + static void ext_oneapi_fill_usm_cmd_buffer( + sycl::detail::ContextImplPtr Context, + sycl::detail::pi::PiExtCommandBuffer CommandBuffer, void *DstMem, + size_t Len, int Pattern, + std::vector Deps, + sycl::detail::pi::PiExtSyncPoint *OutSyncPoint); + + static void + ext_oneapi_fill_cmd_buffer(sycl::detail::ContextImplPtr Context, + sycl::detail::pi::PiExtCommandBuffer CommandBuffer, + SYCLMemObjI *SYCLMemObj, void *Mem, + size_t PatternSize, const char *Pattern, + unsigned int Dim, sycl::range<3> Size, + sycl::range<3> AccessRange, + sycl::id<3> AccessOffset, unsigned int ElementSize, + std::vector Deps, + sycl::detail::pi::PiExtSyncPoint *OutSyncPoint); + static void copy_image_bindless(void *Src, QueueImplPtr Queue, void *Dst, const sycl::detail::pi::PiMemImageDesc &Desc, diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index db14a10943ce3..2ffc0ebd54a38 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2764,6 +2764,28 @@ pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { MEvent->setSyncPoint(OutSyncPoint); return PI_SUCCESS; } + case CG::CGTYPE::Fill: { + CGFill *Fill = (CGFill *)MCommandGroup.get(); + Requirement *Req = (Requirement *)(Fill->getReqToFill()); + AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); + + MemoryManager::ext_oneapi_fill_cmd_buffer( + MQueue->getContextImplPtr(), MCommandBuffer, AllocaCmd->getSYCLMemObj(), + AllocaCmd->getMemAllocation(), Fill->MPattern.size(), + Fill->MPattern.data(), Req->MDims, Req->MMemoryRange, Req->MAccessRange, + Req->MOffset, Req->MElemSize, std::move(MSyncPointDeps), &OutSyncPoint); + MEvent->setSyncPoint(OutSyncPoint); + return PI_SUCCESS; + } + case CG::CGTYPE::FillUSM: { + CGFillUSM *Fill = (CGFillUSM *)MCommandGroup.get(); + MemoryManager::ext_oneapi_fill_usm_cmd_buffer( + MQueue->getContextImplPtr(), MCommandBuffer, Fill->getDst(), + Fill->getLength(), Fill->getFill(), std::move(MSyncPointDeps), + &OutSyncPoint); + MEvent->setSyncPoint(OutSyncPoint); + return PI_SUCCESS; + } default: throw runtime_error("CG type not implemented for command buffers.", PI_ERROR_INVALID_OPERATION); diff --git a/sycl/test-e2e/Graph/Explicit/buffer_fill.cpp b/sycl/test-e2e/Graph/Explicit/buffer_fill.cpp new file mode 100644 index 0000000000000..73b961994a72b --- /dev/null +++ b/sycl/test-e2e/Graph/Explicit/buffer_fill.cpp @@ -0,0 +1,11 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env UR_L0_LEAKS_DEBUG=1 %{run} %t.out 2>&1 | FileCheck --implicit-check-not=LEAK %s %} +// +// TODO enable cuda once buffer issue investigated and fixed +// UNSUPPORTED: cuda + +#define GRAPH_E2E_EXPLICIT + +#include "../Inputs/buffer_fill.cpp" diff --git a/sycl/test-e2e/Graph/Explicit/usm_memset.cpp b/sycl/test-e2e/Graph/Explicit/usm_memset.cpp new file mode 100644 index 0000000000000..a8a42abc1acd0 --- /dev/null +++ b/sycl/test-e2e/Graph/Explicit/usm_memset.cpp @@ -0,0 +1,11 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env UR_L0_LEAKS_DEBUG=1 %{run} %t.out 2>&1 | FileCheck --implicit-check-not=LEAK %s %} + +// USM memset command not supported for OpenCL +// UNSUPPORTED: opencl + +#define GRAPH_E2E_EXPLICIT + +#include "../Inputs/usm_memset.cpp" diff --git a/sycl/test-e2e/Graph/Inputs/buffer_fill.cpp b/sycl/test-e2e/Graph/Inputs/buffer_fill.cpp new file mode 100644 index 0000000000000..351194dadda0f --- /dev/null +++ b/sycl/test-e2e/Graph/Inputs/buffer_fill.cpp @@ -0,0 +1,88 @@ +// Tests adding a Buffer fill operation as a graph node. + +#include "../graph_common.hpp" + +int main() { + + queue Queue{{sycl::ext::intel::property::queue::no_immediate_command_list{}}}; + + if (!are_graphs_supported(Queue)) { + return 0; + } + + const size_t N = 10; + const float Pattern = 3.14f; + std::vector Data(N); + buffer Buffer{Data}; + + const uint64_t PatternI64 = 0x3333333355555555; + std::vector DataI64(N); + buffer BufferI64{DataI64}; + + const uint32_t PatternI32 = 888; + std::vector DataI32(N); + buffer BufferI32{DataI32}; + + const uint16_t PatternI16 = 777; + std::vector DataI16(N); + buffer BufferI16{DataI16}; + + const uint8_t PatternI8 = 33; + std::vector DataI8(N); + buffer BufferI8{DataI8}; + + Buffer.set_write_back(false); + BufferI64.set_write_back(false); + BufferI32.set_write_back(false); + BufferI16.set_write_back(false); + BufferI8.set_write_back(false); + { + exp_ext::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {exp_ext::property::graph::assume_buffer_outlives_graph{}}}; + + add_node(Graph, Queue, [&](handler &CGH) { + auto Acc = Buffer.get_access(CGH); + CGH.fill(Acc, Pattern); + }); + + add_node(Graph, Queue, [&](handler &CGH) { + auto Acc = BufferI64.get_access(CGH); + CGH.fill(Acc, PatternI64); + }); + + add_node(Graph, Queue, [&](handler &CGH) { + auto Acc = BufferI32.get_access(CGH); + CGH.fill(Acc, PatternI32); + }); + + add_node(Graph, Queue, [&](handler &CGH) { + auto Acc = BufferI16.get_access(CGH); + CGH.fill(Acc, PatternI16); + }); + + add_node(Graph, Queue, [&](handler &CGH) { + auto Acc = BufferI8.get_access(CGH); + CGH.fill(Acc, PatternI8); + }); + + auto ExecGraph = Graph.finalize(); + + Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraph); }).wait(); + } + host_accessor HostData(Buffer); + host_accessor HostDataI64(BufferI64); + host_accessor HostDataI32(BufferI32); + host_accessor HostDataI16(BufferI16); + host_accessor HostDataI8(BufferI8); + for (int i = 0; i < N; i++) { + assert(HostData[i] == Pattern); + assert(HostDataI64[i] == PatternI64); + assert(HostDataI32[i] == PatternI32); + assert(HostDataI16[i] == PatternI16); + assert(HostDataI8[i] == PatternI8); + } + + return 0; +} diff --git a/sycl/test-e2e/Graph/Inputs/usm_memset.cpp b/sycl/test-e2e/Graph/Inputs/usm_memset.cpp new file mode 100644 index 0000000000000..f357b9b3a5adf --- /dev/null +++ b/sycl/test-e2e/Graph/Inputs/usm_memset.cpp @@ -0,0 +1,34 @@ +// Tests adding a USM memset operation as a graph node. + +#include "../graph_common.hpp" + +int main() { + + queue Queue{{sycl::ext::intel::property::queue::no_immediate_command_list{}}}; + + if (!are_graphs_supported(Queue)) { + return 0; + } + + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + const size_t N = 10; + unsigned char *Arr = malloc_device(N, Queue); + + int Value = 77; + auto NodeA = + add_node(Graph, Queue, [&](handler &CGH) { CGH.memset(Arr, Value, N); }); + + auto ExecGraph = Graph.finalize(); + + Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraph); }).wait(); + + std::vector Output(N); + Queue.memcpy(Output.data(), Arr, N).wait(); + for (int i = 0; i < N; i++) + assert(Output[i] == Value); + + sycl::free(Arr, Queue); + + return 0; +} diff --git a/sycl/test-e2e/Graph/RecordReplay/buffer_fill.cpp b/sycl/test-e2e/Graph/RecordReplay/buffer_fill.cpp new file mode 100644 index 0000000000000..91729ace49742 --- /dev/null +++ b/sycl/test-e2e/Graph/RecordReplay/buffer_fill.cpp @@ -0,0 +1,11 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env UR_L0_LEAKS_DEBUG=1 %{run} %t.out 2>&1 | FileCheck --implicit-check-not=LEAK %s %} +// +// TODO enable cuda once buffer issue investigated and fixed +// UNSUPPORTED: cuda + +#define GRAPH_E2E_RECORD_REPLAY + +#include "../Inputs/buffer_fill.cpp" diff --git a/sycl/test-e2e/Graph/RecordReplay/usm_memset.cpp b/sycl/test-e2e/Graph/RecordReplay/usm_memset.cpp new file mode 100644 index 0000000000000..acbb0a502c67f --- /dev/null +++ b/sycl/test-e2e/Graph/RecordReplay/usm_memset.cpp @@ -0,0 +1,11 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env UR_L0_LEAKS_DEBUG=1 %{run} %t.out 2>&1 | FileCheck --implicit-check-not=LEAK %s %} + +// USM memset command not supported for OpenCL +// UNSUPPORTED: opencl + +#define GRAPH_E2E_RECORD_REPLAY + +#include "../Inputs/usm_memset.cpp" diff --git a/sycl/test/abi/pi_cuda_symbol_check.dump b/sycl/test/abi/pi_cuda_symbol_check.dump index abf73cce97bd0..fcdf008702292 100644 --- a/sycl/test/abi/pi_cuda_symbol_check.dump +++ b/sycl/test/abi/pi_cuda_symbol_check.dump @@ -84,9 +84,11 @@ piSamplerRetain piTearDown piextBindlessImageSamplerCreate piextCommandBufferCreate +piextCommandBufferFillUSM piextCommandBufferFinalize piextCommandBufferMemBufferCopy piextCommandBufferMemBufferCopyRect +piextCommandBufferMemBufferFill piextCommandBufferMemBufferRead piextCommandBufferMemBufferReadRect piextCommandBufferMemBufferWrite diff --git a/sycl/test/abi/pi_hip_symbol_check.dump b/sycl/test/abi/pi_hip_symbol_check.dump index 3940b6d80677a..c0dacf2632e9b 100644 --- a/sycl/test/abi/pi_hip_symbol_check.dump +++ b/sycl/test/abi/pi_hip_symbol_check.dump @@ -84,9 +84,11 @@ piSamplerRetain piTearDown piextBindlessImageSamplerCreate piextCommandBufferCreate +piextCommandBufferFillUSM piextCommandBufferFinalize piextCommandBufferMemBufferCopy piextCommandBufferMemBufferCopyRect +piextCommandBufferMemBufferFill piextCommandBufferMemBufferRead piextCommandBufferMemBufferReadRect piextCommandBufferMemBufferWrite diff --git a/sycl/test/abi/pi_level_zero_symbol_check.dump b/sycl/test/abi/pi_level_zero_symbol_check.dump index 38b3a420b2e71..a2bd23cbf26ce 100644 --- a/sycl/test/abi/pi_level_zero_symbol_check.dump +++ b/sycl/test/abi/pi_level_zero_symbol_check.dump @@ -83,9 +83,11 @@ piSamplerRetain piTearDown piextBindlessImageSamplerCreate piextCommandBufferCreate +piextCommandBufferFillUSM piextCommandBufferFinalize piextCommandBufferMemBufferCopy piextCommandBufferMemBufferCopyRect +piextCommandBufferMemBufferFill piextCommandBufferMemBufferRead piextCommandBufferMemBufferReadRect piextCommandBufferMemBufferWrite diff --git a/sycl/test/abi/pi_opencl_symbol_check.dump b/sycl/test/abi/pi_opencl_symbol_check.dump index 11ee74902849b..8bece2c54db32 100644 --- a/sycl/test/abi/pi_opencl_symbol_check.dump +++ b/sycl/test/abi/pi_opencl_symbol_check.dump @@ -83,9 +83,11 @@ piSamplerRetain piTearDown piextBindlessImageSamplerCreate piextCommandBufferCreate +piextCommandBufferFillUSM piextCommandBufferFinalize piextCommandBufferMemBufferCopy piextCommandBufferMemBufferCopyRect +piextCommandBufferMemBufferFill piextCommandBufferMemBufferRead piextCommandBufferMemBufferReadRect piextCommandBufferMemBufferWrite diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 62336dad2ed28..6a3bbbab13a5e 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3926,10 +3926,12 @@ _ZN4sycl3_V16detail13MemoryManager21copy_to_device_globalEPKvbSt10shared_ptrINS1 _ZN4sycl3_V16detail13MemoryManager23copy_from_device_globalEPKvbSt10shared_ptrINS1_10queue_implEEmmPvRKSt6vectorIP9_pi_eventSaISB_EEPSB_ _ZN4sycl3_V16detail13MemoryManager23copy_from_device_globalEPKvbSt10shared_ptrINS1_10queue_implEEmmPvRKSt6vectorIP9_pi_eventSaISB_EEPSB_RKS5_INS1_10event_implEE _ZN4sycl3_V16detail13MemoryManager24allocateInteropMemObjectESt10shared_ptrINS1_12context_implEEPvRKS3_INS1_10event_implEERKS5_RKNS0_13property_listERP9_pi_event +_ZN4sycl3_V16detail13MemoryManager26ext_oneapi_fill_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPNS1_11SYCLMemObjIEPvmPKcjNS0_5rangeILi3EEESE_NS0_2idILi3EEEjSt6vectorIjSaIjEEPj _ZN4sycl3_V16detail13MemoryManager29ext_oneapi_copyD2D_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPNS1_11SYCLMemObjIEPvjNS0_5rangeILi3EEESC_NS0_2idILi3EEEjSA_jSC_SC_SE_jSt6vectorIjSaIjEEPj _ZN4sycl3_V16detail13MemoryManager29ext_oneapi_copyD2H_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPNS1_11SYCLMemObjIEPvjNS0_5rangeILi3EEESC_NS0_2idILi3EEEjPcjSC_SE_jSt6vectorIjSaIjEEPj _ZN4sycl3_V16detail13MemoryManager29ext_oneapi_copyH2D_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPNS1_11SYCLMemObjIEPcjNS0_5rangeILi3EEENS0_2idILi3EEEjPvjSC_SC_SE_jSt6vectorIjSaIjEEPj _ZN4sycl3_V16detail13MemoryManager30ext_oneapi_copy_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEPKvP22_pi_ext_command_buffermPvSt6vectorIjSaIjEEPj +_ZN4sycl3_V16detail13MemoryManager30ext_oneapi_fill_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPvmiSt6vectorIjSaIjEEPj _ZN4sycl3_V16detail13MemoryManager3mapEPNS1_11SYCLMemObjIEPvSt10shared_ptrINS1_10queue_implEENS0_6access4modeEjNS0_5rangeILi3EEESC_NS0_2idILi3EEEjSt6vectorIP9_pi_eventSaISH_EERSH_ _ZN4sycl3_V16detail13MemoryManager4copyEPNS1_11SYCLMemObjIEPvSt10shared_ptrINS1_10queue_implEEjNS0_5rangeILi3EEESA_NS0_2idILi3EEEjS5_S8_jSA_SA_SC_jSt6vectorIP9_pi_eventSaISF_EERSF_ _ZN4sycl3_V16detail13MemoryManager4copyEPNS1_11SYCLMemObjIEPvSt10shared_ptrINS1_10queue_implEEjNS0_5rangeILi3EEESA_NS0_2idILi3EEEjS5_S8_jSA_SA_SC_jSt6vectorIP9_pi_eventSaISF_EERSF_RKS6_INS1_10event_implEE diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index c3e000b4f9553..f20eb9cede900 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -1041,6 +1041,8 @@ ?ext_oneapi_empty@queue@_V1@sycl@@QEBA_NXZ ?ext_oneapi_enable_peer_access@device@_V1@sycl@@QEAAXAEBV123@@Z ?ext_oneapi_fill2d_impl@handler@_V1@sycl@@AEAAXPEAX_KPEBX111@Z +?ext_oneapi_fill_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAU_pi_ext_command_buffer@@PEAVSYCLMemObjI@234@PEAX_KPEBDIV?$range@$02@34@6V?$id@$02@34@IV?$vector@IV?$allocator@I@std@@@6@PEAI@Z +?ext_oneapi_fill_usm_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAU_pi_ext_command_buffer@@PEAX_KHV?$vector@IV?$allocator@I@std@@@6@PEAI@Z ?ext_oneapi_get_default_context@platform@_V1@sycl@@QEBA?AVcontext@23@XZ ?ext_oneapi_get_kernel@kernel_bundle_plain@detail@_V1@sycl@@QEAA?AVkernel@34@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@Z ?ext_oneapi_get_state@queue@_V1@sycl@@QEBA?AW4queue_state@experimental@oneapi@ext@23@XZ diff --git a/sycl/unittests/Extensions/CommandGraph.cpp b/sycl/unittests/Extensions/CommandGraph.cpp index 8ad08257a1046..a9d5dd4f63081 100644 --- a/sycl/unittests/Extensions/CommandGraph.cpp +++ b/sycl/unittests/Extensions/CommandGraph.cpp @@ -2178,3 +2178,65 @@ TEST_F(MultiThreadGraphTest, Finalize) { ASSERT_EQ(checkExecGraphSchedule(GraphExecImpl, GraphExecRefImpl), true); } } + +// Test adding fill and memset nodes to a graph +TEST_F(CommandGraphTest, FillMemsetNodes) { + const int Value = 7; + // Buffer fill + buffer Buffer{range<1>{1}}; + Buffer.set_write_back(false); + + { + ext::oneapi::experimental::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {experimental::property::graph::assume_buffer_outlives_graph{}}}; + + auto NodeA = Graph.add([&](handler &CGH) { + auto Acc = Buffer.get_access(CGH); + CGH.fill(Acc, Value); + }); + auto NodeB = Graph.add([&](handler &CGH) { + auto Acc = Buffer.get_access(CGH); + CGH.fill(Acc, Value); + }); + + auto NodeAImpl = sycl::detail::getSyclObjImpl(NodeA); + auto NodeBImpl = sycl::detail::getSyclObjImpl(NodeB); + + // Check Operator== + EXPECT_EQ(NodeAImpl, NodeAImpl); + EXPECT_NE(NodeAImpl, NodeBImpl); + } + + // USM + { + int *USMPtr = malloc_device(1, Queue); + + // We need to create some differences between nodes because unlike buffer + // fills they are not differentiated on accessor ptr value. + auto FillNodeA = + Graph.add([&](handler &CGH) { CGH.fill(USMPtr, Value, 1); }); + auto FillNodeB = + Graph.add([&](handler &CGH) { CGH.fill(USMPtr, Value + 1, 1); }); + auto MemsetNodeA = + Graph.add([&](handler &CGH) { CGH.memset(USMPtr, Value, 1); }); + auto MemsetNodeB = + Graph.add([&](handler &CGH) { CGH.memset(USMPtr, Value, 2); }); + + auto FillNodeAImpl = sycl::detail::getSyclObjImpl(FillNodeA); + auto FillNodeBImpl = sycl::detail::getSyclObjImpl(FillNodeB); + auto MemsetNodeAImpl = sycl::detail::getSyclObjImpl(MemsetNodeA); + auto MemsetNodeBImpl = sycl::detail::getSyclObjImpl(MemsetNodeB); + + // Check Operator== + EXPECT_EQ(FillNodeAImpl, FillNodeAImpl); + EXPECT_EQ(FillNodeBImpl, FillNodeBImpl); + EXPECT_NE(FillNodeAImpl, FillNodeBImpl); + + EXPECT_EQ(MemsetNodeAImpl, MemsetNodeAImpl); + EXPECT_EQ(MemsetNodeBImpl, MemsetNodeBImpl); + EXPECT_NE(MemsetNodeAImpl, MemsetNodeBImpl); + sycl::free(USMPtr, Queue); + } +} diff --git a/sycl/unittests/helpers/PiMockPlugin.hpp b/sycl/unittests/helpers/PiMockPlugin.hpp index f12917b1e70b9..decc1a7e309ee 100644 --- a/sycl/unittests/helpers/PiMockPlugin.hpp +++ b/sycl/unittests/helpers/PiMockPlugin.hpp @@ -1376,6 +1376,23 @@ inline pi_result mock_piextCommandBufferMemBufferCopyRect( return PI_SUCCESS; } +inline pi_result mock_piextCommandBufferMemBufferFill( + pi_ext_command_buffer command_buffer, pi_mem buffer, const void *pattern, + size_t pattern_size, size_t offset, size_t size, + pi_uint32 num_sync_points_in_wait_list, + const pi_ext_sync_point *sync_point_wait_list, + pi_ext_sync_point *sync_point) { + return PI_SUCCESS; +} + +inline pi_result mock_piextCommandBufferFillUSM( + pi_ext_command_buffer command_buffer, void *ptr, const void *pattern, + size_t pattern_size, size_t size, pi_uint32 num_sync_points_in_wait_list, + const pi_ext_sync_point *sync_point_wait_list, + pi_ext_sync_point *sync_point) { + return PI_SUCCESS; +} + inline pi_result mock_piTearDown(void *PluginParameter) { return PI_SUCCESS; } inline pi_result mock_piPluginGetLastError(char **message) {