Skip to content

Commit 8af054f

Browse files
Moved changes from UR PR
1 parent 3730930 commit 8af054f

File tree

3 files changed

+143
-74
lines changed

3 files changed

+143
-74
lines changed

sycl/test-e2e/Graph/ValidUsage/linear_graph_copy.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
//
88
// Extra runs to test in-order command lists path
99
// RUN: %if level_zero %{env UR_L0_USE_DRIVER_INORDER_LISTS=1 UR_L0_CMD_BUFFER_USE_IMMEDIATE_APPEND_PATH=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
10-
// RUN: %if level_zero %{env UR_L0_USE_DRIVER_INORDER_LISTS=1 UR_L0_CMD_BUFFER_USE_IMMEDIATE_APPEND_PATH=1 UR_L0_USE_DRIVER_COUNTER_BASED_EVENTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
10+
// RUN: %if level_zero %{env UR_L0_USE_DRIVER_INORDER_LISTS=1 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 UR_L0_CMD_BUFFER_USE_IMMEDIATE_APPEND_PATH=1 UR_L0_USE_DRIVER_COUNTER_BASED_EVENTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
1111
//
1212
// Tests that the optimization to use the L0 Copy Engine for memory commands
1313
// does not interfere with the linear graph optimization

unified-runtime/source/adapters/level_zero/command_buffer.cpp

+139-73
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "command_buffer.hpp"
1111
#include "helpers/kernel_helpers.hpp"
1212
#include "logger/ur_logger.hpp"
13+
#include "ur_api.h"
1314
#include "ur_interface_loader.hpp"
1415
#include "ur_level_zero.hpp"
1516

@@ -170,6 +171,65 @@ ur_result_t getEventsFromSyncPoints(
170171
return UR_RESULT_SUCCESS;
171172
}
172173

174+
/**
175+
* If necessary, it creates a signal event and appends it to the previous
176+
* command list (copy or compute), to indicate when it's finished executing.
177+
* @param[in] CommandBuffer The CommandBuffer where the command is appended.
178+
* @param[in] ZeCommandList the CommandList that's currently in use.
179+
* @param[out] WaitEventList The list of event for the future command list to
180+
* wait on before execution.
181+
* @return UR_RESULT_SUCCESS or an error code on failure
182+
*/
183+
ur_result_t createSyncPointBetweenCopyAndCompute(
184+
ur_exp_command_buffer_handle_t CommandBuffer,
185+
ze_command_list_handle_t ZeCommandList,
186+
std::vector<ze_event_handle_t> &WaitEventList) {
187+
188+
if (!CommandBuffer->ZeCopyCommandList) {
189+
return UR_RESULT_SUCCESS;
190+
}
191+
192+
bool IsCopy{ZeCommandList == CommandBuffer->ZeCopyCommandList};
193+
194+
// Skip synchronization for the first node in a graph or if the current
195+
// command list matches the previous one.
196+
if (!CommandBuffer->MWasPrevCopyCommandList.has_value()) {
197+
CommandBuffer->MWasPrevCopyCommandList = IsCopy;
198+
return UR_RESULT_SUCCESS;
199+
} else if (IsCopy == CommandBuffer->MWasPrevCopyCommandList) {
200+
return UR_RESULT_SUCCESS;
201+
}
202+
203+
/*
204+
* If the current CommandList differs from the previously used one, we must
205+
* append a signal event to the previous CommandList to track when
206+
* its execution is complete.
207+
*/
208+
ur_event_handle_t SignalPrevCommandEvent = nullptr;
209+
UR_CALL(EventCreate(CommandBuffer->Context, nullptr /*Queue*/,
210+
false /*IsMultiDevice*/, false, &SignalPrevCommandEvent,
211+
false /*CounterBasedEventEnabled*/,
212+
!CommandBuffer->IsProfilingEnabled,
213+
false /*InterruptBasedEventEnabled*/));
214+
215+
// Determine which command list to signal.
216+
auto CommandListToSignal = (!IsCopy && CommandBuffer->MWasPrevCopyCommandList)
217+
? CommandBuffer->ZeCopyCommandList
218+
: CommandBuffer->ZeComputeCommandList;
219+
CommandBuffer->MWasPrevCopyCommandList = IsCopy;
220+
221+
ZE2UR_CALL(zeCommandListAppendSignalEvent,
222+
(CommandListToSignal, SignalPrevCommandEvent->ZeEvent));
223+
224+
// Add the event to the dependencies for future command list to wait on.
225+
WaitEventList.push_back(SignalPrevCommandEvent->ZeEvent);
226+
227+
// Mark the event for future reset.
228+
CommandBuffer->ZeEventsList.push_back(SignalPrevCommandEvent->ZeEvent);
229+
230+
return UR_RESULT_SUCCESS;
231+
}
232+
173233
/**
174234
* If needed, creates a sync point for a given command and returns the L0
175235
* events associated with the sync point.
@@ -190,7 +250,7 @@ ur_result_t getEventsFromSyncPoints(
190250
*/
191251
ur_result_t createSyncPointAndGetZeEvents(
192252
ur_command_t CommandType, ur_exp_command_buffer_handle_t CommandBuffer,
193-
uint32_t NumSyncPointsInWaitList,
253+
ze_command_list_handle_t ZeCommandList, uint32_t NumSyncPointsInWaitList,
194254
const ur_exp_command_buffer_sync_point_t *SyncPointWaitList,
195255
bool HostVisible, ur_exp_command_buffer_sync_point_t *RetSyncPoint,
196256
std::vector<ze_event_handle_t> &ZeEventList,
@@ -199,6 +259,11 @@ ur_result_t createSyncPointAndGetZeEvents(
199259
ZeLaunchEvent = nullptr;
200260

201261
if (CommandBuffer->IsInOrderCmdList) {
262+
UR_CALL(createSyncPointBetweenCopyAndCompute(CommandBuffer, ZeCommandList,
263+
ZeEventList));
264+
if (!ZeEventList.empty()) {
265+
NumSyncPointsInWaitList = ZeEventList.size();
266+
}
202267
return UR_RESULT_SUCCESS;
203268
}
204269

@@ -225,24 +290,24 @@ ur_result_t createSyncPointAndGetZeEvents(
225290
return UR_RESULT_SUCCESS;
226291
}
227292

228-
// Shared by all memory read/write/copy PI interfaces.
229-
// Helper function for common code when enqueuing memory operations to a command
230-
// buffer.
293+
// Shared by all memory read/write/copy UR interfaces.
294+
// Helper function for common code when enqueuing memory operations to a
295+
// command buffer.
231296
ur_result_t enqueueCommandBufferMemCopyHelper(
232297
ur_command_t CommandType, ur_exp_command_buffer_handle_t CommandBuffer,
233298
void *Dst, const void *Src, size_t Size, bool PreferCopyEngine,
234299
uint32_t NumSyncPointsInWaitList,
235300
const ur_exp_command_buffer_sync_point_t *SyncPointWaitList,
236301
ur_exp_command_buffer_sync_point_t *RetSyncPoint) {
237302

303+
ze_command_list_handle_t ZeCommandList =
304+
CommandBuffer->chooseCommandList(PreferCopyEngine);
305+
238306
std::vector<ze_event_handle_t> ZeEventList;
239307
ze_event_handle_t ZeLaunchEvent = nullptr;
240308
UR_CALL(createSyncPointAndGetZeEvents(
241-
CommandType, CommandBuffer, NumSyncPointsInWaitList, SyncPointWaitList,
242-
false, RetSyncPoint, ZeEventList, ZeLaunchEvent));
243-
244-
ze_command_list_handle_t ZeCommandList =
245-
CommandBuffer->chooseCommandList(PreferCopyEngine);
309+
CommandType, CommandBuffer, ZeCommandList, NumSyncPointsInWaitList,
310+
SyncPointWaitList, false, RetSyncPoint, ZeEventList, ZeLaunchEvent));
246311

247312
ZE2UR_CALL(zeCommandListAppendMemoryCopy,
248313
(ZeCommandList, Dst, Src, Size, ZeLaunchEvent, ZeEventList.size(),
@@ -293,14 +358,14 @@ ur_result_t enqueueCommandBufferMemCopyRectHelper(
293358
const ze_copy_region_t ZeDstRegion = {DstOriginX, DstOriginY, DstOriginZ,
294359
Width, Height, Depth};
295360

361+
ze_command_list_handle_t ZeCommandList =
362+
CommandBuffer->chooseCommandList(PreferCopyEngine);
363+
296364
std::vector<ze_event_handle_t> ZeEventList;
297365
ze_event_handle_t ZeLaunchEvent = nullptr;
298366
UR_CALL(createSyncPointAndGetZeEvents(
299-
CommandType, CommandBuffer, NumSyncPointsInWaitList, SyncPointWaitList,
300-
false, RetSyncPoint, ZeEventList, ZeLaunchEvent));
301-
302-
ze_command_list_handle_t ZeCommandList =
303-
CommandBuffer->chooseCommandList(PreferCopyEngine);
367+
CommandType, CommandBuffer, ZeCommandList, NumSyncPointsInWaitList,
368+
SyncPointWaitList, false, RetSyncPoint, ZeEventList, ZeLaunchEvent));
304369

305370
ZE2UR_CALL(zeCommandListAppendMemoryCopyRegion,
306371
(ZeCommandList, Dst, &ZeDstRegion, DstPitch, DstSlicePitch, Src,
@@ -321,19 +386,19 @@ ur_result_t enqueueCommandBufferFillHelper(
321386
UR_ASSERT((PatternSize > 0) && ((PatternSize & (PatternSize - 1)) == 0),
322387
UR_RESULT_ERROR_INVALID_VALUE);
323388

324-
std::vector<ze_event_handle_t> ZeEventList;
325-
ze_event_handle_t ZeLaunchEvent = nullptr;
326-
UR_CALL(createSyncPointAndGetZeEvents(
327-
CommandType, CommandBuffer, NumSyncPointsInWaitList, SyncPointWaitList,
328-
true, RetSyncPoint, ZeEventList, ZeLaunchEvent));
329-
330389
bool PreferCopyEngine;
331390
UR_CALL(
332391
preferCopyEngineForFill(CommandBuffer, PatternSize, PreferCopyEngine));
333392

334393
ze_command_list_handle_t ZeCommandList =
335394
CommandBuffer->chooseCommandList(PreferCopyEngine);
336395

396+
std::vector<ze_event_handle_t> ZeEventList;
397+
ze_event_handle_t ZeLaunchEvent = nullptr;
398+
UR_CALL(createSyncPointAndGetZeEvents(
399+
CommandType, CommandBuffer, ZeCommandList, NumSyncPointsInWaitList,
400+
SyncPointWaitList, true, RetSyncPoint, ZeEventList, ZeLaunchEvent));
401+
337402
ZE2UR_CALL(zeCommandListAppendMemoryFill,
338403
(ZeCommandList, Ptr, Pattern, PatternSize, Size, ZeLaunchEvent,
339404
ZeEventList.size(), getPointerFromVector(ZeEventList)));
@@ -477,12 +542,12 @@ void ur_exp_command_buffer_handle_t_::registerSyncPoint(
477542

478543
ze_command_list_handle_t
479544
ur_exp_command_buffer_handle_t_::chooseCommandList(bool PreferCopyEngine) {
480-
if (PreferCopyEngine && this->useCopyEngine() && !this->IsInOrderCmdList) {
545+
if (PreferCopyEngine && useCopyEngine() && !IsInOrderCmdList) {
481546
// We indicate that ZeCopyCommandList contains commands to be submitted.
482-
this->MCopyCommandListEmpty = false;
483-
return this->ZeCopyCommandList;
547+
MCopyCommandListEmpty = false;
548+
return ZeCopyCommandList;
484549
}
485-
return this->ZeComputeCommandList;
550+
return ZeComputeCommandList;
486551
}
487552

488553
ur_result_t ur_exp_command_buffer_handle_t_::getFenceForQueue(
@@ -646,7 +711,7 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device,
646711
// the current implementation only uses the main copy engine and does not use
647712
// the link engine even if available.
648713
if (Device->hasMainCopyEngine()) {
649-
UR_CALL(createMainCommandList(Context, Device, false, false, true,
714+
UR_CALL(createMainCommandList(Context, Device, IsInOrder, false, true,
650715
ZeCopyCommandList));
651716
}
652717

@@ -812,18 +877,25 @@ finalizeWaitEventPath(ur_exp_command_buffer_handle_t CommandBuffer) {
812877
(CommandBuffer->ZeCommandListResetEvents,
813878
CommandBuffer->ExecutionFinishedEvent->ZeEvent));
814879

815-
if (CommandBuffer->IsInOrderCmdList) {
816-
ZE2UR_CALL(zeCommandListAppendSignalEvent,
817-
(CommandBuffer->ZeComputeCommandList,
818-
CommandBuffer->ExecutionFinishedEvent->ZeEvent));
819-
} else {
820-
// Reset the L0 events we use for command-buffer sync-points to the
821-
// non-signaled state. This is required for multiple submissions.
880+
// Reset the L0 events we use for command-buffer sync-points to the
881+
// non-signaled state. This is required for multiple submissions.
882+
auto resetEvents = [&CommandBuffer]() -> ur_result_t {
822883
for (auto &Event : CommandBuffer->ZeEventsList) {
823884
ZE2UR_CALL(zeCommandListAppendEventReset,
824885
(CommandBuffer->ZeCommandListResetEvents, Event));
825886
}
887+
return UR_RESULT_SUCCESS;
888+
};
826889

890+
if (CommandBuffer->IsInOrderCmdList) {
891+
if (!CommandBuffer->MCopyCommandListEmpty) {
892+
resetEvents();
893+
}
894+
ZE2UR_CALL(zeCommandListAppendSignalEvent,
895+
(CommandBuffer->ZeComputeCommandList,
896+
CommandBuffer->ExecutionFinishedEvent->ZeEvent));
897+
} else {
898+
resetEvents();
827899
// Wait for all the user added commands to complete, and signal the
828900
// command-buffer signal-event when they are done.
829901
ZE2UR_CALL(zeCommandListAppendBarrier,
@@ -1073,7 +1145,8 @@ ur_result_t urCommandBufferAppendKernelLaunchExp(
10731145
std::vector<ze_event_handle_t> ZeEventList;
10741146
ze_event_handle_t ZeLaunchEvent = nullptr;
10751147
UR_CALL(createSyncPointAndGetZeEvents(
1076-
UR_COMMAND_KERNEL_LAUNCH, CommandBuffer, NumSyncPointsInWaitList,
1148+
UR_COMMAND_KERNEL_LAUNCH, CommandBuffer,
1149+
CommandBuffer->ZeComputeCommandList, NumSyncPointsInWaitList,
10771150
SyncPointWaitList, false, RetSyncPoint, ZeEventList, ZeLaunchEvent));
10781151

10791152
ZE2UR_CALL(zeCommandListAppendLaunchKernel,
@@ -1306,29 +1379,25 @@ ur_result_t urCommandBufferAppendUSMPrefetchExp(
13061379
std::ignore = Command;
13071380
std::ignore = Flags;
13081381

1309-
if (CommandBuffer->IsInOrderCmdList) {
1310-
// Add the prefetch command to the command-buffer.
1311-
// Note that L0 does not handle migration flags.
1312-
ZE2UR_CALL(zeCommandListAppendMemoryPrefetch,
1313-
(CommandBuffer->ZeComputeCommandList, Mem, Size));
1314-
} else {
1315-
std::vector<ze_event_handle_t> ZeEventList;
1316-
ze_event_handle_t ZeLaunchEvent = nullptr;
1317-
UR_CALL(createSyncPointAndGetZeEvents(
1318-
UR_COMMAND_USM_PREFETCH, CommandBuffer, NumSyncPointsInWaitList,
1319-
SyncPointWaitList, true, RetSyncPoint, ZeEventList, ZeLaunchEvent));
1320-
1321-
if (NumSyncPointsInWaitList) {
1322-
ZE2UR_CALL(zeCommandListAppendWaitOnEvents,
1323-
(CommandBuffer->ZeComputeCommandList, NumSyncPointsInWaitList,
1324-
ZeEventList.data()));
1325-
}
1382+
std::vector<ze_event_handle_t> ZeEventList;
1383+
ze_event_handle_t ZeLaunchEvent = nullptr;
1384+
UR_CALL(createSyncPointAndGetZeEvents(
1385+
UR_COMMAND_USM_PREFETCH, CommandBuffer,
1386+
CommandBuffer->ZeComputeCommandList, NumSyncPointsInWaitList,
1387+
SyncPointWaitList, true, RetSyncPoint, ZeEventList, ZeLaunchEvent));
1388+
1389+
if (NumSyncPointsInWaitList) {
1390+
ZE2UR_CALL(zeCommandListAppendWaitOnEvents,
1391+
(CommandBuffer->ZeComputeCommandList, NumSyncPointsInWaitList,
1392+
ZeEventList.data()));
1393+
}
13261394

1327-
// Add the prefetch command to the command-buffer.
1328-
// Note that L0 does not handle migration flags.
1329-
ZE2UR_CALL(zeCommandListAppendMemoryPrefetch,
1330-
(CommandBuffer->ZeComputeCommandList, Mem, Size));
1395+
// Add the prefetch command to the command-buffer.
1396+
// Note that L0 does not handle migration flags.
1397+
ZE2UR_CALL(zeCommandListAppendMemoryPrefetch,
1398+
(CommandBuffer->ZeComputeCommandList, Mem, Size));
13311399

1400+
if (!CommandBuffer->IsInOrderCmdList) {
13321401
// Level Zero does not have a completion "event" with the prefetch API,
13331402
// so manually add command to signal our event.
13341403
ZE2UR_CALL(zeCommandListAppendSignalEvent,
@@ -1376,27 +1445,24 @@ ur_result_t urCommandBufferAppendUSMAdviseExp(
13761445

13771446
ze_memory_advice_t ZeAdvice = static_cast<ze_memory_advice_t>(Value);
13781447

1379-
if (CommandBuffer->IsInOrderCmdList) {
1380-
ZE2UR_CALL(zeCommandListAppendMemAdvise,
1381-
(CommandBuffer->ZeComputeCommandList,
1382-
CommandBuffer->Device->ZeDevice, Mem, Size, ZeAdvice));
1383-
} else {
1384-
std::vector<ze_event_handle_t> ZeEventList;
1385-
ze_event_handle_t ZeLaunchEvent = nullptr;
1386-
UR_CALL(createSyncPointAndGetZeEvents(
1387-
UR_COMMAND_USM_ADVISE, CommandBuffer, NumSyncPointsInWaitList,
1388-
SyncPointWaitList, true, RetSyncPoint, ZeEventList, ZeLaunchEvent));
1389-
1390-
if (NumSyncPointsInWaitList) {
1391-
ZE2UR_CALL(zeCommandListAppendWaitOnEvents,
1392-
(CommandBuffer->ZeComputeCommandList, NumSyncPointsInWaitList,
1393-
ZeEventList.data()));
1394-
}
1448+
std::vector<ze_event_handle_t> ZeEventList;
1449+
ze_event_handle_t ZeLaunchEvent = nullptr;
1450+
UR_CALL(createSyncPointAndGetZeEvents(
1451+
UR_COMMAND_USM_ADVISE, CommandBuffer, CommandBuffer->ZeComputeCommandList,
1452+
NumSyncPointsInWaitList, SyncPointWaitList, true, RetSyncPoint,
1453+
ZeEventList, ZeLaunchEvent));
13951454

1396-
ZE2UR_CALL(zeCommandListAppendMemAdvise,
1397-
(CommandBuffer->ZeComputeCommandList,
1398-
CommandBuffer->Device->ZeDevice, Mem, Size, ZeAdvice));
1455+
if (NumSyncPointsInWaitList) {
1456+
ZE2UR_CALL(zeCommandListAppendWaitOnEvents,
1457+
(CommandBuffer->ZeComputeCommandList, NumSyncPointsInWaitList,
1458+
ZeEventList.data()));
1459+
}
1460+
1461+
ZE2UR_CALL(zeCommandListAppendMemAdvise,
1462+
(CommandBuffer->ZeComputeCommandList,
1463+
CommandBuffer->Device->ZeDevice, Mem, Size, ZeAdvice));
13991464

1465+
if (!CommandBuffer->IsInOrderCmdList) {
14001466
// Level Zero does not have a completion "event" with the advise API,
14011467
// so manually add command to signal our event.
14021468
ZE2UR_CALL(zeCommandListAppendSignalEvent,

unified-runtime/source/adapters/level_zero/command_buffer.hpp

+3
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
//===----------------------------------------------------------------------===//
1010
#pragma once
1111

12+
#include <optional>
1213
#include <ur/ur.hpp>
1314
#include <ur_api.h>
1415
#include <ze_api.h>
@@ -110,6 +111,8 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object {
110111
// This flag must be set to false if at least one copy command has been
111112
// added to `ZeCopyCommandList`
112113
bool MCopyCommandListEmpty = true;
114+
// This flag tracks if the previous node submission was of a copy type.
115+
std::optional<bool> MWasPrevCopyCommandList;
113116
// [WaitEvent Path only] Level Zero fences for each queue the command-buffer
114117
// has been enqueued to. These should be destroyed when the command-buffer is
115118
// released.

0 commit comments

Comments
 (0)