Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCL][Graphs] Error checking improvements and fixes for SYCL-Graphs #10628

Closed
wants to merge 8 commits into from
25 changes: 25 additions & 0 deletions sycl/include/sycl/accessor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,7 @@ class __SYCL_EXPORT AccessorBaseHost {
const range<3> &getMemoryRange() const;
void *getPtr() const noexcept;
bool isPlaceholder() const;
bool isMemoryObjectUsedByGraph() const;

detail::AccHostDataT &getAccData();

Expand Down Expand Up @@ -1454,6 +1455,18 @@ class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor :
typename std::iterator_traits<iterator>::difference_type;
using size_type = std::size_t;

/// If creating a host_accessor this checks to see if the underlying memory
/// object is currently in use by a command_graph, and throws if it is.
void throwIfUsedByGraph() const {
#ifndef __SYCL_DEVICE_ONLY__
if (IsHostBuf && AccessorBaseHost::isMemoryObjectUsedByGraph()) {
throw sycl::exception(make_error_code(errc::invalid),
"Host accessors cannot be created for buffers "
"which are currently in use by a command graph.");
}
#endif
}

// The list of accessor constructors with their arguments
// -------+---------+-------+----+-----+--------------
// Dimensions = 0
Expand Down Expand Up @@ -1533,6 +1546,7 @@ class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor :
detail::getSyclObjImpl(BufferRef).get(), AdjustedDim, sizeof(DataT),
IsPlaceH, BufferRef.OffsetInBytes, BufferRef.IsSubBuffer,
PropertyList) {
throwIfUsedByGraph();
preScreenAccessor(PropertyList);
if (!AccessorBaseHost::isPlaceholder())
addHostAccessorAndWait(AccessorBaseHost::impl.get());
Expand Down Expand Up @@ -1572,6 +1586,7 @@ class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor :
detail::getSyclObjImpl(BufferRef).get(), AdjustedDim, sizeof(DataT),
IsPlaceH, BufferRef.OffsetInBytes, BufferRef.IsSubBuffer,
PropertyList) {
throwIfUsedByGraph();
preScreenAccessor(PropertyList);
if (!AccessorBaseHost::isPlaceholder())
addHostAccessorAndWait(AccessorBaseHost::impl.get());
Expand Down Expand Up @@ -1607,6 +1622,7 @@ class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor :
getAdjustedMode(PropertyList),
detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT),
BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, PropertyList) {
throwIfUsedByGraph();
preScreenAccessor(PropertyList);
detail::associateWithHandler(CommandGroupHandler, this, AccessTarget);
initHostAcc();
Expand Down Expand Up @@ -1643,6 +1659,7 @@ class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor :
getAdjustedMode(PropertyList),
detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT),
BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, PropertyList) {
throwIfUsedByGraph();
preScreenAccessor(PropertyList);
detail::associateWithHandler(CommandGroupHandler, this, AccessTarget);
initHostAcc();
Expand Down Expand Up @@ -1675,6 +1692,7 @@ class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor :
detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT),
IsPlaceH, BufferRef.OffsetInBytes, BufferRef.IsSubBuffer,
PropertyList) {
throwIfUsedByGraph();
preScreenAccessor(PropertyList);
if (!AccessorBaseHost::isPlaceholder())
addHostAccessorAndWait(AccessorBaseHost::impl.get());
Expand Down Expand Up @@ -1710,6 +1728,7 @@ class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor :
detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT),
IsPlaceH, BufferRef.OffsetInBytes, BufferRef.IsSubBuffer,
PropertyList) {
throwIfUsedByGraph();
preScreenAccessor(PropertyList);
if (!AccessorBaseHost::isPlaceholder())
addHostAccessorAndWait(AccessorBaseHost::impl.get());
Expand Down Expand Up @@ -1772,6 +1791,7 @@ class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor :
getAdjustedMode(PropertyList),
detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT),
BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, PropertyList) {
throwIfUsedByGraph();
preScreenAccessor(PropertyList);
detail::associateWithHandler(CommandGroupHandler, this, AccessTarget);
initHostAcc();
Expand Down Expand Up @@ -1806,6 +1826,7 @@ class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor :
getAdjustedMode(PropertyList),
detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT),
BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, PropertyList) {
throwIfUsedByGraph();
preScreenAccessor(PropertyList);
initHostAcc();
detail::associateWithHandler(CommandGroupHandler, this, AccessTarget);
Expand Down Expand Up @@ -1981,6 +2002,7 @@ class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor :
detail::getSyclObjImpl(BufferRef).get(), Dimensions,
sizeof(DataT), IsPlaceH, BufferRef.OffsetInBytes,
BufferRef.IsSubBuffer, PropertyList) {
throwIfUsedByGraph();
preScreenAccessor(PropertyList);
if (!AccessorBaseHost::isPlaceholder())
addHostAccessorAndWait(AccessorBaseHost::impl.get());
Expand Down Expand Up @@ -2023,6 +2045,7 @@ class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor :
detail::getSyclObjImpl(BufferRef).get(), Dimensions,
sizeof(DataT), IsPlaceH, BufferRef.OffsetInBytes,
BufferRef.IsSubBuffer, PropertyList) {
throwIfUsedByGraph();
preScreenAccessor(PropertyList);
if (!AccessorBaseHost::isPlaceholder())
addHostAccessorAndWait(AccessorBaseHost::impl.get());
Expand Down Expand Up @@ -2094,6 +2117,7 @@ class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor :
detail::getSyclObjImpl(BufferRef).get(), Dimensions,
sizeof(DataT), BufferRef.OffsetInBytes,
BufferRef.IsSubBuffer, PropertyList) {
throwIfUsedByGraph();
preScreenAccessor(PropertyList);
if (BufferRef.isOutOfBounds(AccessOffset, AccessRange,
BufferRef.get_range()))
Expand Down Expand Up @@ -2136,6 +2160,7 @@ class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor :
detail::getSyclObjImpl(BufferRef).get(), Dimensions,
sizeof(DataT), BufferRef.OffsetInBytes,
BufferRef.IsSubBuffer, PropertyList) {
throwIfUsedByGraph();
preScreenAccessor(PropertyList);
if (BufferRef.isOutOfBounds(AccessOffset, AccessRange,
BufferRef.get_range()))
Expand Down
4 changes: 3 additions & 1 deletion sycl/include/sycl/detail/property_helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,10 @@ enum DataLessPropKind {
GraphNoCycleCheck = 19,
QueueSubmissionBatched = 20,
QueueSubmissionImmediate = 21,
GraphAssumeDataOutlivesBuffer = 22,
GraphAssumeBufferOutlivesGraph = 23,
// Indicates the last known dataless property.
LastKnownDataLessPropKind = 21,
LastKnownDataLessPropKind = 23,
// Exceeding 32 may cause ABI breaking change on some of OSes.
DataLessPropKindSize = 32
};
Expand Down
24 changes: 23 additions & 1 deletion sycl/include/sycl/ext/oneapi/experimental/graph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,35 @@ namespace graph {

/// Property passed to command_graph constructor to disable checking for cycles.
///
/// \todo Cycle check not yet implemented.
class no_cycle_check : public ::sycl::detail::DataLessProperty<
::sycl::detail::GraphNoCycleCheck> {
public:
no_cycle_check() = default;
};

/// Property passed to command_graph constructor to allow buffers to be used
/// with graphs. Passing this property represents a promise from the user that
/// the buffer will outlive any graph that it is used in.
///
class assume_buffer_outlives_graph
: public ::sycl::detail::DataLessProperty<
::sycl::detail::GraphAssumeBufferOutlivesGraph> {
public:
assume_buffer_outlives_graph() = default;
};

/// Property passed to command_graph constructor to allow buffers created with
/// host pointers. Passing this property represents a promise from the user that
/// the host data will outlive the buffer and by extension any graph that it is
/// used in.
///
class assume_data_outlives_buffer
: public ::sycl::detail::DataLessProperty<
::sycl::detail::GraphAssumeDataOutlivesBuffer> {
public:
assume_data_outlives_buffer() = default;
};

} // namespace graph

namespace node {
Expand Down
44 changes: 43 additions & 1 deletion sycl/include/sycl/handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,19 @@ class pipe;
}

namespace ext::oneapi::experimental::detail {
// List of sycl experimental extensions
// This enum is used to define the extension from which a function is called.
// This is used in handler::throwIfGraphAssociated() to specify
// the message of the thrown expection.
enum SyclExtensions {
sycl_ext_oneapi_kernel_properties,
sycl_ext_oneapi_enqueue_barrier,
sycl_ext_oneapi_memcpy2d,
sycl_ext_oneapi_device_global
};

class graph_impl;
}
} // namespace ext::oneapi::experimental::detail
namespace detail {

class handler_impl;
Expand Down Expand Up @@ -2085,6 +2096,7 @@ class __SYCL_EXPORT handler {
std::enable_if_t<
ext::oneapi::experimental::is_property_list<PropertiesT>::value>
single_task(PropertiesT Props, _KERNELFUNCPARAM(KernelFunc)) {
throwIfGraphAssociatedAndKernelProperties<PropertiesT>();
single_task_lambda_impl<KernelName, KernelType, PropertiesT>(Props,
KernelFunc);
}
Expand All @@ -2095,6 +2107,7 @@ class __SYCL_EXPORT handler {
ext::oneapi::experimental::is_property_list<PropertiesT>::value>
parallel_for(range<1> NumWorkItems, PropertiesT Props,
_KERNELFUNCPARAM(KernelFunc)) {
throwIfGraphAssociatedAndKernelProperties<PropertiesT>();
parallel_for_lambda_impl<KernelName, KernelType, 1, PropertiesT>(
NumWorkItems, Props, std::move(KernelFunc));
}
Expand All @@ -2105,6 +2118,7 @@ class __SYCL_EXPORT handler {
ext::oneapi::experimental::is_property_list<PropertiesT>::value>
parallel_for(range<2> NumWorkItems, PropertiesT Props,
_KERNELFUNCPARAM(KernelFunc)) {
throwIfGraphAssociatedAndKernelProperties<PropertiesT>();
parallel_for_lambda_impl<KernelName, KernelType, 2, PropertiesT>(
NumWorkItems, Props, std::move(KernelFunc));
}
Expand All @@ -2115,6 +2129,7 @@ class __SYCL_EXPORT handler {
ext::oneapi::experimental::is_property_list<PropertiesT>::value>
parallel_for(range<3> NumWorkItems, PropertiesT Props,
_KERNELFUNCPARAM(KernelFunc)) {
throwIfGraphAssociatedAndKernelProperties<PropertiesT>();
parallel_for_lambda_impl<KernelName, KernelType, 3, PropertiesT>(
NumWorkItems, Props, std::move(KernelFunc));
}
Expand All @@ -2125,6 +2140,7 @@ class __SYCL_EXPORT handler {
ext::oneapi::experimental::is_property_list<PropertiesT>::value>
parallel_for(nd_range<Dims> Range, PropertiesT Properties,
_KERNELFUNCPARAM(KernelFunc)) {
throwIfGraphAssociatedAndKernelProperties<PropertiesT>();
parallel_for_impl<KernelName>(Range, Properties, std::move(KernelFunc));
}

Expand All @@ -2137,6 +2153,7 @@ class __SYCL_EXPORT handler {
detail::AreAllButLastReductions<RestT...>::value &&
ext::oneapi::experimental::is_property_list<PropertiesT>::value>
parallel_for(range<1> Range, PropertiesT Properties, RestT &&...Rest) {
throwIfGraphAssociatedAndKernelProperties<PropertiesT>();
detail::reduction_parallel_for<KernelName>(*this, Range, Properties,
std::forward<RestT>(Rest)...);
}
Expand All @@ -2148,6 +2165,7 @@ class __SYCL_EXPORT handler {
detail::AreAllButLastReductions<RestT...>::value &&
ext::oneapi::experimental::is_property_list<PropertiesT>::value>
parallel_for(range<2> Range, PropertiesT Properties, RestT &&...Rest) {
throwIfGraphAssociatedAndKernelProperties<PropertiesT>();
detail::reduction_parallel_for<KernelName>(*this, Range, Properties,
std::forward<RestT>(Rest)...);
}
Expand All @@ -2159,6 +2177,7 @@ class __SYCL_EXPORT handler {
detail::AreAllButLastReductions<RestT...>::value &&
ext::oneapi::experimental::is_property_list<PropertiesT>::value>
parallel_for(range<3> Range, PropertiesT Properties, RestT &&...Rest) {
throwIfGraphAssociatedAndKernelProperties<PropertiesT>();
detail::reduction_parallel_for<KernelName>(*this, Range, Properties,
std::forward<RestT>(Rest)...);
}
Expand Down Expand Up @@ -2213,6 +2232,7 @@ class __SYCL_EXPORT handler {
int Dims, typename PropertiesT>
void parallel_for_work_group(range<Dims> NumWorkGroups, PropertiesT Props,
_KERNELFUNCPARAM(KernelFunc)) {
throwIfGraphAssociatedAndKernelProperties<PropertiesT>();
parallel_for_work_group_lambda_impl<KernelName, KernelType, Dims,
PropertiesT>(NumWorkGroups, Props,
KernelFunc);
Expand All @@ -2223,6 +2243,7 @@ class __SYCL_EXPORT handler {
void parallel_for_work_group(range<Dims> NumWorkGroups,
range<Dims> WorkGroupSize, PropertiesT Props,
_KERNELFUNCPARAM(KernelFunc)) {
throwIfGraphAssociatedAndKernelProperties<PropertiesT>();
parallel_for_work_group_lambda_impl<KernelName, KernelType, Dims,
PropertiesT>(
NumWorkGroups, WorkGroupSize, Props, KernelFunc);
Expand Down Expand Up @@ -2530,6 +2551,8 @@ class __SYCL_EXPORT handler {
/// until all commands previously submitted to this queue have entered the
/// complete state.
void ext_oneapi_barrier() {
throwIfGraphAssociated<ext::oneapi::experimental::detail::SyclExtensions::
sycl_ext_oneapi_enqueue_barrier>();
throwIfActionIsCreated();
setType(detail::CG::Barrier);
}
Expand Down Expand Up @@ -2615,6 +2638,8 @@ class __SYCL_EXPORT handler {
typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
void ext_oneapi_memcpy2d(void *Dest, size_t DestPitch, const void *Src,
size_t SrcPitch, size_t Width, size_t Height) {
throwIfGraphAssociated<ext::oneapi::experimental::detail::SyclExtensions::
sycl_ext_oneapi_memcpy2d>();
throwIfActionIsCreated();
if (Width > DestPitch)
throw sycl::exception(sycl::make_error_code(errc::invalid),
Expand Down Expand Up @@ -2793,6 +2818,8 @@ class __SYCL_EXPORT handler {
void memcpy(ext::oneapi::experimental::device_global<T, PropertyListT> &Dest,
const void *Src, size_t NumBytes = sizeof(T),
size_t DestOffset = 0) {
throwIfGraphAssociated<ext::oneapi::experimental::detail::SyclExtensions::
sycl_ext_oneapi_device_global>();
if (sizeof(T) < DestOffset + NumBytes)
throw sycl::exception(make_error_code(errc::invalid),
"Copy to device_global is out of bounds.");
Expand Down Expand Up @@ -2825,6 +2852,8 @@ class __SYCL_EXPORT handler {
memcpy(void *Dest,
const ext::oneapi::experimental::device_global<T, PropertyListT> &Src,
size_t NumBytes = sizeof(T), size_t SrcOffset = 0) {
throwIfGraphAssociated<ext::oneapi::experimental::detail::SyclExtensions::
sycl_ext_oneapi_device_global>();
if (sizeof(T) < SrcOffset + NumBytes)
throw sycl::exception(make_error_code(errc::invalid),
"Copy from device_global is out of bounds.");
Expand Down Expand Up @@ -3346,8 +3375,21 @@ class __SYCL_EXPORT handler {
"handler::require() before it can be used.");
}

template <typename PropertiesT>
std::enable_if_t<
ext::oneapi::experimental::is_property_list<PropertiesT>::value>
throwIfGraphAssociatedAndKernelProperties() {
if (!std::is_same_v<PropertiesT,
ext::oneapi::experimental::detail::empty_properties_t>)
throwIfGraphAssociated<ext::oneapi::experimental::detail::SyclExtensions::
sycl_ext_oneapi_kernel_properties>();
}

// Set value of the gpu cache configuration for the kernel.
void setKernelCacheConfig(sycl::detail::pi::PiKernelCacheConfig);

template <ext::oneapi::experimental::detail::SyclExtensions ExtensionT>
void throwIfGraphAssociated();
};
} // namespace _V1
} // namespace sycl
Original file line number Diff line number Diff line change
Expand Up @@ -661,6 +661,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
ur_exp_command_buffer_handle_t CommandBuffer, ur_queue_handle_t Queue,
uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList,
ur_event_handle_t *Event) {
// There are issues with immediate command lists so return an error if the
// queue is in that mode.
if (Queue->UsingImmCmdLists) {
return UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES;
}

std::scoped_lock<ur_shared_mutex> lock(Queue->Mutex);
// Use compute engine rather than copy engine
const auto UseCopyEngine = false;
Expand Down
5 changes: 5 additions & 0 deletions sycl/source/accessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//

#include <detail/queue_impl.hpp>
#include <detail/sycl_mem_obj_t.hpp>
#include <sycl/accessor.hpp>

namespace sycl {
Expand Down Expand Up @@ -68,6 +69,10 @@ void *AccessorBaseHost::getMemoryObject() const { return impl->MSYCLMemObj; }

bool AccessorBaseHost::isPlaceholder() const { return impl->MIsPlaceH; }

bool AccessorBaseHost::isMemoryObjectUsedByGraph() const {
return static_cast<detail::SYCLMemObjT *>(impl->MSYCLMemObj)->isUsedInGraph();
}

LocalAccessorBaseHost::LocalAccessorBaseHost(
sycl::range<3> Size, int Dims, int ElemSize,
const property_list &PropertyList) {
Expand Down
5 changes: 5 additions & 0 deletions sycl/source/detail/fusion/fusion_wrapper_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ bool fusion_wrapper_impl::is_in_fusion_mode() const {
}

void fusion_wrapper_impl::start_fusion() {
if (MQueue->getCommandGraph()) {
throw sycl::exception(sycl::make_error_code(errc::invalid),
"SYCL kernel fusion can NOT be started "
"on a queue that is in a recording state.");
}
detail::Scheduler::getInstance().startFusion(MQueue);
}

Expand Down
Loading