From 7e38b39ad717d30f013699e5002dbee892d743cc Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Mon, 8 Jul 2024 14:35:56 +0100 Subject: [PATCH 01/12] Remove deprecated hip APIs. Mostly these APIs were completely redundant already. HIP amd doesn't have have a context concept. Signed-off-by: JackAKirk --- source/adapters/hip/command_buffer.cpp | 2 +- source/adapters/hip/context.cpp | 19 ++++--- source/adapters/hip/context.hpp | 68 +------------------------- source/adapters/hip/device.cpp | 2 +- source/adapters/hip/device.hpp | 33 ++++++++----- source/adapters/hip/enqueue.cpp | 44 ++++++++--------- source/adapters/hip/enqueue_native.cpp | 2 +- source/adapters/hip/event.cpp | 2 +- source/adapters/hip/kernel.cpp | 4 +- source/adapters/hip/memory.cpp | 12 ++--- source/adapters/hip/memory.hpp | 2 +- source/adapters/hip/platform.cpp | 8 ++- source/adapters/hip/program.cpp | 4 +- source/adapters/hip/queue.cpp | 10 ++-- source/adapters/hip/usm.cpp | 4 +- source/adapters/hip/usm_p2p.cpp | 6 +-- 16 files changed, 86 insertions(+), 136 deletions(-) diff --git a/source/adapters/hip/command_buffer.cpp b/source/adapters/hip/command_buffer.cpp index d9438eeb9c..baba125fe4 100644 --- a/source/adapters/hip/command_buffer.cpp +++ b/source/adapters/hip/command_buffer.cpp @@ -882,7 +882,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( try { std::unique_ptr RetImplEvent{nullptr}; - ScopedContext Active(hQueue->getDevice()); + ScopedDevice Active(hQueue->getDevice()); uint32_t StreamToken; ur_stream_guard Guard; hipStream_t HIPStream = hQueue->getNextComputeStream( diff --git a/source/adapters/hip/context.cpp b/source/adapters/hip/context.cpp index dd8312d916..1dbfa1386a 100644 --- a/source/adapters/hip/context.cpp +++ b/source/adapters/hip/context.cpp @@ -32,10 +32,7 @@ ur_context_handle_t_::getOwningURPool(umf_memory_pool_t *UMFPool) { return nullptr; } -/// Create a UR HIP context. -/// -/// By default creates a scoped context and keeps the last active HIP context -/// on top of the HIP context stack. +/// Create a UR context. /// UR_APIEXPORT ur_result_t UR_APICALL urContextCreate( uint32_t DeviceCount, const ur_device_handle_t *phDevices, @@ -44,7 +41,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextCreate( std::unique_ptr ContextPtr{nullptr}; try { - // Create a scoped context. + // Create a context. ContextPtr = std::unique_ptr( new ur_context_handle_t_{phDevices, DeviceCount}); *phContext = ContextPtr.release(); @@ -115,8 +112,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextGetNativeHandle( ur_context_handle_t hContext, ur_native_handle_t *phNativeContext) { // FIXME: this entry point has been deprecated in the SYCL RT and should be // changed to unsupported once the deprecation period has elapsed - *phNativeContext = reinterpret_cast( - hContext->getDevices()[0]->getNativeContext()); + // The below is extremely dodgy but is the equivalent for what went before + // for continuity: apparently some users may be somehow using this API + // currently, despite it not being well defined. This API should not have been + // implemented in the HIP backend. hipCtx_t is not natively supported by amd + // devices and is meaningless for our purposes; all hipCtx_t APIs were added + // for cuda compatibility only and are deprecated by HIP. + + hipCtx_t *Ctx; + UR_CHECK_ERROR(hipCtxGetCurrent(Ctx)); + *phNativeContext = reinterpret_cast(Ctx); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/hip/context.hpp b/source/adapters/hip/context.hpp index 90366436e2..c797d7459a 100644 --- a/source/adapters/hip/context.hpp +++ b/source/adapters/hip/context.hpp @@ -19,35 +19,6 @@ typedef void (*ur_context_extended_deleter_t)(void *UserData); -/// UR context mapping to a HIP context object. -/// -/// There is no direct mapping between a HIP context and a UR context. -/// The main differences are described below: -/// -/// HIP context vs UR context -/// -/// One of the main differences between the UR API and the HIP driver API is -/// that the second modifies the state of the threads by assigning -/// \c hipCtx_t objects to threads. \c hipCtx_t objects store data associated -/// with a given device and control access to said device from the user side. -/// UR API context are objects that are passed to functions, and not bound -/// to threads. -/// -/// Since the \c ur_context_handle_t can contain multiple devices, and a \c -/// hipCtx_t refers to only a single device, the \c hipCtx_t is more tightly -/// coupled to a \c ur_device_handle_t than a \c ur_context_handle_t. In order -/// to remove some ambiguities about the different semantics of \c -/// \c ur_context_handle_t and native \c hipCtx_t, we access the native \c -/// hipCtx_t solely through the \c ur_device_handle_t class, by using the object -/// \ref ScopedContext, which sets the active device (by setting the active -/// native \c hipCtx_t). -/// -/// Primary vs User-defined \c hipCtx_t -/// -/// HIP has two different types of \c hipCtx_t, the Primary context, which is -/// usable by all threads on a given process for a given device, and the -/// aforementioned custom \c hipCtx_t s. The HIP documentation, confirmed with -/// performance analysis, suggest using the Primary context whenever possible. /// /// Destructor callback /// @@ -76,24 +47,14 @@ struct ur_context_handle_t_ { void operator()() { Function(UserData); } }; - using native_type = hipCtx_t; - std::vector Devices; std::atomic_uint32_t RefCount; ur_context_handle_t_(const ur_device_handle_t *Devs, uint32_t NumDevices) - : Devices{Devs, Devs + NumDevices}, RefCount{1} { - for (auto &Dev : Devices) { - urDeviceRetain(Dev); - } - }; + : Devices{Devs, Devs + NumDevices}, RefCount{1} {}; - ~ur_context_handle_t_() { - for (auto &Dev : Devices) { - urDeviceRelease(Dev); - } - } + ~ur_context_handle_t_() {} void invokeExtendedDeleters() { std::lock_guard Guard(Mutex); @@ -136,28 +97,3 @@ struct ur_context_handle_t_ { std::vector ExtendedDeleters; std::set PoolHandles; }; - -namespace { -/// Scoped context is used across all UR HIP plugin implementation to activate -/// the native Context on the current thread. The ScopedContext does not -/// reinstate the previous context as all operations in the hip adapter that -/// require an active context, set the active context and don't rely on context -/// reinstation -class ScopedContext { -public: - ScopedContext(ur_device_handle_t hDevice) { - hipCtx_t Original{}; - - if (!hDevice) { - throw UR_RESULT_ERROR_INVALID_DEVICE; - } - - hipCtx_t Desired = hDevice->getNativeContext(); - UR_CHECK_ERROR(hipCtxGetCurrent(&Original)); - if (Original != Desired) { - // Sets the desired context as the active one for the thread - UR_CHECK_ERROR(hipCtxSetCurrent(Desired)); - } - } -}; -} // namespace diff --git a/source/adapters/hip/device.cpp b/source/adapters/hip/device.cpp index da92fa6a87..54b105f2b8 100644 --- a/source/adapters/hip/device.cpp +++ b/source/adapters/hip/device.cpp @@ -1077,7 +1077,7 @@ ur_result_t UR_APICALL urDeviceGetGlobalTimestamps(ur_device_handle_t hDevice, return UR_RESULT_SUCCESS; ur_event_handle_t_::native_type Event; - ScopedContext Active(hDevice); + ScopedDevice Active(hDevice); if (pDeviceTimestamp) { UR_CHECK_ERROR(hipEventCreateWithFlags(&Event, hipEventDefault)); diff --git a/source/adapters/hip/device.hpp b/source/adapters/hip/device.hpp index 5fd11bfc2f..7a0755e52b 100644 --- a/source/adapters/hip/device.hpp +++ b/source/adapters/hip/device.hpp @@ -24,7 +24,6 @@ struct ur_device_handle_t_ { native_type HIPDevice; std::atomic_uint32_t RefCount; ur_platform_handle_t Platform; - hipCtx_t HIPContext; hipEvent_t EvBase; // HIP event used as base counter uint32_t DeviceIndex; @@ -37,11 +36,10 @@ struct ur_device_handle_t_ { int ConcurrentManagedAccess{0}; public: - ur_device_handle_t_(native_type HipDevice, hipCtx_t Context, - hipEvent_t EvBase, ur_platform_handle_t Platform, - uint32_t DeviceIndex) - : HIPDevice(HipDevice), RefCount{1}, Platform(Platform), - HIPContext(Context), EvBase(EvBase), DeviceIndex(DeviceIndex) { + ur_device_handle_t_(native_type HipDevice, hipEvent_t EvBase, + ur_platform_handle_t Platform, uint32_t DeviceIndex) + : HIPDevice(HipDevice), RefCount{1}, Platform(Platform), EvBase(EvBase), + DeviceIndex(DeviceIndex) { UR_CHECK_ERROR(hipDeviceGetAttribute( &MaxWorkGroupSize, hipDeviceAttributeMaxThreadsPerBlock, HIPDevice)); @@ -61,9 +59,7 @@ struct ur_device_handle_t_ { HIPDevice)); } - ~ur_device_handle_t_() noexcept(false) { - UR_CHECK_ERROR(hipDevicePrimaryCtxRelease(HIPDevice)); - } + ~ur_device_handle_t_() noexcept(false) {} native_type get() const noexcept { return HIPDevice; }; @@ -73,8 +69,6 @@ struct ur_device_handle_t_ { uint64_t getElapsedTime(hipEvent_t) const; - hipCtx_t getNativeContext() const noexcept { return HIPContext; }; - // Returns the index of the device relative to the other devices in the same // platform uint32_t getIndex() const noexcept { return DeviceIndex; }; @@ -97,3 +91,20 @@ struct ur_device_handle_t_ { }; int getAttribute(ur_device_handle_t Device, hipDeviceAttribute_t Attribute); + +namespace { +/// Scoped Device is used across all UR HIP plugin implementation to activate +/// the native Device on the current thread. The ScopedDevice does not +/// reinstate the previous device as all operations in the hip adapter that +/// require an active device, set the active device and don't rely on device +/// reinstation +class ScopedDevice { +public: + ScopedDevice(ur_device_handle_t hDevice) { + if (!hDevice) { + throw UR_RESULT_ERROR_INVALID_DEVICE; + } + hipSetDevice(hDevice->get()); + } +}; +} // namespace diff --git a/source/adapters/hip/enqueue.cpp b/source/adapters/hip/enqueue.cpp index 99f23a30a4..66eafedf15 100644 --- a/source/adapters/hip/enqueue.cpp +++ b/source/adapters/hip/enqueue.cpp @@ -31,7 +31,7 @@ ur_result_t enqueueEventsWait(ur_queue_handle_t Queue, hipStream_t Stream, auto Result = forLatestEvents( EventWaitList, NumEventsInWaitList, [Stream, Queue](ur_event_handle_t Event) -> ur_result_t { - ScopedContext Active(Queue->getDevice()); + ScopedDevice Active(Queue->getDevice()); if (Event->isCompleted() || Event->getStream() == Stream) { return UR_RESULT_SUCCESS; } else { @@ -164,7 +164,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( hBuffer->setLastQueueWritingToMemObj(hQueue); try { - ScopedContext Active(hQueue->getDevice()); + ScopedDevice Active(hQueue->getDevice()); hipStream_t HIPStream = hQueue->getNextTransferStream(); UR_CHECK_ERROR(enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, phEventWaitList)); @@ -220,7 +220,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( } auto Device = hQueue->getDevice(); - ScopedContext Active(Device); + ScopedDevice Active(Device); hipStream_t HIPStream = hQueue->getNextTransferStream(); // Use the default stream if copying from another device @@ -290,7 +290,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( pGlobalWorkSize, pLocalWorkSize, hKernel, HIPFunc, ThreadsPerBlock, BlocksPerGrid)); - ScopedContext Active(Dev); + ScopedDevice Active(Dev); uint32_t StreamToken; ur_stream_guard Guard; @@ -378,7 +378,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST) try { - ScopedContext Active(hQueue->getDevice()); + ScopedDevice Active(hQueue->getDevice()); uint32_t StreamToken; ur_stream_guard Guard; hipStream_t HIPStream = hQueue->getNextComputeStream( @@ -533,7 +533,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( } auto Device = hQueue->getDevice(); - ScopedContext Active(Device); + ScopedDevice Active(Device); hipStream_t HIPStream = hQueue->getNextTransferStream(); UR_CHECK_ERROR(enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, @@ -582,7 +582,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( hBuffer->setLastQueueWritingToMemObj(hQueue); try { - ScopedContext Active(hQueue->getDevice()); + ScopedDevice Active(hQueue->getDevice()); hipStream_t HIPStream = hQueue->getNextTransferStream(); UR_CHECK_ERROR(enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, phEventWaitList)); @@ -629,7 +629,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( std::unique_ptr RetImplEvent{nullptr}; try { - ScopedContext Active(hQueue->getDevice()); + ScopedDevice Active(hQueue->getDevice()); ur_result_t Result = UR_RESULT_SUCCESS; auto Stream = hQueue->getNextTransferStream(); @@ -680,7 +680,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( std::unique_ptr RetImplEvent{nullptr}; try { - ScopedContext Active(hQueue->getDevice()); + ScopedDevice Active(hQueue->getDevice()); hipStream_t HIPStream = hQueue->getNextTransferStream(); Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, phEventWaitList); @@ -794,7 +794,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( hBuffer->setLastQueueWritingToMemObj(hQueue); try { - ScopedContext Active(hQueue->getDevice()); + ScopedDevice Active(hQueue->getDevice()); auto Stream = hQueue->getNextTransferStream(); if (phEventWaitList) { @@ -941,7 +941,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( } auto Device = hQueue->getDevice(); - ScopedContext Active(Device); + ScopedDevice Active(Device); hipStream_t HIPStream = hQueue->getNextTransferStream(); if (phEventWaitList) { @@ -1001,7 +1001,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( UR_ASSERT(hImage->isImage(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); try { - ScopedContext Active(hQueue->getDevice()); + ScopedDevice Active(hQueue->getDevice()); hipStream_t HIPStream = hQueue->getNextTransferStream(); if (phEventWaitList) { @@ -1066,7 +1066,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( ur_result_t Result = UR_RESULT_SUCCESS; try { - ScopedContext Active(hQueue->getDevice()); + ScopedDevice Active(hQueue->getDevice()); hipStream_t HIPStream = hQueue->getNextTransferStream(); if (phEventWaitList) { Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, @@ -1161,7 +1161,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( hQueue, hBuffer, blockingMap, offset, size, MapPtr, numEventsInWaitList, phEventWaitList, phEvent)); } else { - ScopedContext Active(hQueue->getDevice()); + ScopedDevice Active(hQueue->getDevice()); if (IsPinned) { UR_CHECK_ERROR(urEnqueueEventsWait(hQueue, numEventsInWaitList, @@ -1211,7 +1211,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( hQueue, hMem, true, Map->getMapOffset(), Map->getMapSize(), pMappedPtr, numEventsInWaitList, phEventWaitList, phEvent)); } else { - ScopedContext Active(hQueue->getDevice()); + ScopedDevice Active(hQueue->getDevice()); if (IsPinned) { UR_CHECK_ERROR(urEnqueueEventsWait(hQueue, numEventsInWaitList, @@ -1241,7 +1241,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( std::unique_ptr EventPtr{nullptr}; try { - ScopedContext Active(hQueue->getDevice()); + ScopedDevice Active(hQueue->getDevice()); uint32_t StreamToken; ur_stream_guard Guard; hipStream_t HIPStream = hQueue->getNextComputeStream( @@ -1299,7 +1299,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( std::unique_ptr EventPtr{nullptr}; try { - ScopedContext Active(hQueue->getDevice()); + ScopedDevice Active(hQueue->getDevice()); hipStream_t HIPStream = hQueue->getNextTransferStream(); Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, phEventWaitList); @@ -1348,7 +1348,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( ur_result_t Result = UR_RESULT_SUCCESS; try { - ScopedContext Active(hQueue->getDevice()); + ScopedDevice Active(hQueue->getDevice()); hipStream_t HIPStream = hQueue->getNextTransferStream(); Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, phEventWaitList); @@ -1425,7 +1425,7 @@ urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size, #endif try { - ScopedContext Active(Device); + ScopedDevice Active(Device); std::unique_ptr EventPtr{nullptr}; if (phEvent) { @@ -1561,7 +1561,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( ur_result_t Result = UR_RESULT_SUCCESS; try { - ScopedContext Active(hQueue->getDevice()); + ScopedDevice Active(hQueue->getDevice()); hipStream_t HIPStream = hQueue->getNextTransferStream(); Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, phEventWaitList); @@ -1762,7 +1762,7 @@ setKernelParams(const ur_device_handle_t Device, const uint32_t WorkDim, size_t MaxWorkGroupSize = 0; ur_result_t Result = UR_RESULT_SUCCESS; try { - ScopedContext Active(Device); + ScopedDevice Active(Device); { size_t MaxThreadsPerBlock[3] = { static_cast(Device->getMaxBlockDimX()), @@ -1906,7 +1906,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueTimestampRecordingExp( ur_result_t Result = UR_RESULT_SUCCESS; std::unique_ptr RetImplEvent{nullptr}; try { - ScopedContext Active(hQueue->getDevice()); + ScopedDevice Active(hQueue->getDevice()); uint32_t StreamToken; ur_stream_guard Guard; diff --git a/source/adapters/hip/enqueue_native.cpp b/source/adapters/hip/enqueue_native.cpp index 1ad6bbe2c0..ee171c1725 100644 --- a/source/adapters/hip/enqueue_native.cpp +++ b/source/adapters/hip/enqueue_native.cpp @@ -27,7 +27,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueNativeCommandExp( // sure memory migration happens across devices in the same context try { - ScopedContext ActiveContext(hQueue->getDevice()); + ScopedDevice ActiveDevice(hQueue->getDevice()); ScopedStream ActiveStream(hQueue, NumEventsInWaitList, phEventWaitList); std::unique_ptr RetImplEvent{nullptr}; diff --git a/source/adapters/hip/event.cpp b/source/adapters/hip/event.cpp index dbf1d331ee..cf295b4574 100644 --- a/source/adapters/hip/event.cpp +++ b/source/adapters/hip/event.cpp @@ -155,7 +155,7 @@ urEventWait(uint32_t numEvents, const ur_event_handle_t *phEventWaitList) { UR_ASSERT(numEvents > 0, UR_RESULT_ERROR_INVALID_VALUE); try { - ScopedContext Active(phEventWaitList[0]->getContext()->getDevices()[0]); + ScopedDevice Active(phEventWaitList[0]->getContext()->getDevices()[0]); auto WaitFunc = [](ur_event_handle_t Event) -> ur_result_t { UR_ASSERT(Event, UR_RESULT_ERROR_INVALID_EVENT); diff --git a/source/adapters/hip/kernel.cpp b/source/adapters/hip/kernel.cpp index aa46843963..6dd3a7d2cb 100644 --- a/source/adapters/hip/kernel.cpp +++ b/source/adapters/hip/kernel.cpp @@ -20,7 +20,7 @@ urKernelCreate(ur_program_handle_t hProgram, const char *pKernelName, std::unique_ptr RetKernel{nullptr}; try { - ScopedContext Active(hProgram->getDevice()); + ScopedDevice Active(hProgram->getDevice()); hipFunction_t HIPFunc; hipError_t KernelError = @@ -373,7 +373,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSize( MaxThreadsPerBlock[2] = hQueue->Device->getMaxBlockDimZ(); ur_device_handle_t Device = hQueue->getDevice(); - ScopedContext Active(Device); + ScopedDevice Active(Device); guessLocalWorkSize(Device, ThreadsPerBlock, pGlobalWorkSize, workDim, MaxThreadsPerBlock); diff --git a/source/adapters/hip/memory.cpp b/source/adapters/hip/memory.cpp index 5f06567064..af96e7c0d5 100644 --- a/source/adapters/hip/memory.cpp +++ b/source/adapters/hip/memory.cpp @@ -135,7 +135,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( if (PerformInitialCopy && HostPtr) { // Perform initial copy to every device in context for (auto &Device : hContext->getDevices()) { - ScopedContext Active(Device); + ScopedDevice Active(Device); // getPtr may allocate mem if not already allocated const auto &Ptr = std::get(URMemObj->Mem).getPtr(Device); UR_CHECK_ERROR(hipMemcpyHtoD(Ptr, HostPtr, size)); @@ -238,7 +238,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory, // FIXME: Only getting info for the first device in the context. This // should be fine in general auto Device = hMemory->getContext()->getDevices()[0]; - ScopedContext Active(Device); + ScopedDevice Active(Device); UrReturnHelper ReturnValue(propSize, pMemInfo, pPropSizeRet); @@ -375,7 +375,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate( if (PerformInitialCopy) { for (const auto &Dev : hContext->getDevices()) { - ScopedContext Active(Dev); + ScopedDevice Active(Dev); hipStream_t Stream{0}; // Use default stream UR_CHECK_ERROR( enqueueMigrateMemoryToDeviceIfNeeded(URMemObj.get(), Dev, Stream)); @@ -401,7 +401,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t hMemory, UR_ASSERT(hMemory->isImage(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); // FIXME: only getting infor for first image in ctx auto Device = hMemory->getContext()->getDevices()[0]; - ScopedContext Active(Device); + ScopedDevice Active(Device); UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); try { @@ -474,7 +474,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemRetain(ur_mem_handle_t hMem) { ur_result_t allocateMemObjOnDeviceIfNeeded(ur_mem_handle_t Mem, const ur_device_handle_t hDevice) { - ScopedContext Active(hDevice); + ScopedDevice Active(hDevice); auto DeviceIdx = Mem->getContext()->getDeviceIndex(hDevice); ur_lock LockGuard(Mem->MemoryAllocationMutex); @@ -640,7 +640,7 @@ ur_result_t enqueueMigrateMemoryToDeviceIfNeeded( if (Mem->HaveMigratedToDeviceSinceLastWrite[DeviceIdx]) return UR_RESULT_SUCCESS; - ScopedContext Active(hDevice); + ScopedDevice Active(hDevice); if (Mem->isBuffer()) { UR_CHECK_ERROR(enqueueMigrateBufferToDevice(Mem, hDevice, Stream)); } else { diff --git a/source/adapters/hip/memory.hpp b/source/adapters/hip/memory.hpp index 3ec1e8f4e9..b97f9d6b00 100644 --- a/source/adapters/hip/memory.hpp +++ b/source/adapters/hip/memory.hpp @@ -162,7 +162,7 @@ struct BufferMem { UR_CHECK_ERROR(hipHostUnregister(HostPtr)); break; case AllocMode::AllocHostPtr: - UR_CHECK_ERROR(hipFreeHost(HostPtr)); + UR_CHECK_ERROR(hipHostFree(HostPtr)); } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/hip/platform.cpp b/source/adapters/hip/platform.cpp index 8671d70a57..ebfd422a3b 100644 --- a/source/adapters/hip/platform.cpp +++ b/source/adapters/hip/platform.cpp @@ -77,17 +77,15 @@ urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, for (auto i = 0u; i < static_cast(NumDevices); ++i) { hipDevice_t Device; UR_CHECK_ERROR(hipDeviceGet(&Device, i)); - hipCtx_t Context; - UR_CHECK_ERROR(hipDevicePrimaryCtxRetain(&Context, Device)); hipEvent_t EvBase; UR_CHECK_ERROR(hipEventCreate(&EvBase)); // Use the default stream to record base event counter UR_CHECK_ERROR(hipEventRecord(EvBase, 0)); - Platform.Devices.emplace_back(new ur_device_handle_t_{ - Device, Context, EvBase, &Platform, i}); + Platform.Devices.emplace_back( + new ur_device_handle_t_{Device, EvBase, &Platform, i}); - ScopedContext Active(Platform.Devices.front().get()); + ScopedDevice Active(Platform.Devices.front().get()); } } catch (const std::bad_alloc &) { // Signal out-of-memory situation diff --git a/source/adapters/hip/program.cpp b/source/adapters/hip/program.cpp index 902e78aa9d..7f22a9d610 100644 --- a/source/adapters/hip/program.cpp +++ b/source/adapters/hip/program.cpp @@ -313,7 +313,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuild(ur_context_handle_t, ur_result_t Result = UR_RESULT_SUCCESS; try { - ScopedContext Active(hProgram->getDevice()); + ScopedDevice Active(hProgram->getDevice()); hProgram->buildProgram(pOptions); hProgram->BinaryType = UR_PROGRAM_BINARY_TYPE_EXECUTABLE; @@ -442,7 +442,7 @@ urProgramRelease(ur_program_handle_t hProgram) { ur_result_t Result = UR_RESULT_ERROR_INVALID_PROGRAM; try { - ScopedContext Active(hProgram->getDevice()); + ScopedDevice Active(hProgram->getDevice()); auto HIPModule = hProgram->get(); if (HIPModule) { UR_CHECK_ERROR(hipModuleUnload(HIPModule)); diff --git a/source/adapters/hip/queue.cpp b/source/adapters/hip/queue.cpp index c41bc53a08..427d1c4dce 100644 --- a/source/adapters/hip/queue.cpp +++ b/source/adapters/hip/queue.cpp @@ -135,10 +135,10 @@ urQueueCreate(ur_context_handle_t hContext, ur_device_handle_t hDevice, } if (URFlags & UR_QUEUE_FLAG_PRIORITY_HIGH) { - ScopedContext Active(hDevice); + ScopedDevice Active(hDevice); UR_CHECK_ERROR(hipDeviceGetStreamPriorityRange(nullptr, &Priority)); } else if (URFlags & UR_QUEUE_FLAG_PRIORITY_LOW) { - ScopedContext Active(hDevice); + ScopedDevice Active(hDevice); UR_CHECK_ERROR(hipDeviceGetStreamPriorityRange(&Priority, nullptr)); } } @@ -225,7 +225,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease(ur_queue_handle_t hQueue) { if (!hQueue->backendHasOwnership()) return UR_RESULT_SUCCESS; - ScopedContext Active(hQueue->getDevice()); + ScopedDevice Active(hQueue->getDevice()); hQueue->forEachStream([](hipStream_t S) { UR_CHECK_ERROR(hipStreamSynchronize(S)); @@ -251,7 +251,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish(ur_queue_handle_t hQueue) { try { - ScopedContext Active(hQueue->getDevice()); + ScopedDevice Active(hQueue->getDevice()); hQueue->syncStreams([&Result](hipStream_t S) { UR_CHECK_ERROR(hipStreamSynchronize(S)); @@ -283,7 +283,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueFlush(ur_queue_handle_t) { UR_APIEXPORT ur_result_t UR_APICALL urQueueGetNativeHandle(ur_queue_handle_t hQueue, ur_queue_native_desc_t *, ur_native_handle_t *phNativeQueue) { - ScopedContext Active(hQueue->getDevice()); + ScopedDevice Active(hQueue->getDevice()); *phNativeQueue = reinterpret_cast(hQueue->getNextComputeStream()); return UR_RESULT_SUCCESS; diff --git a/source/adapters/hip/usm.cpp b/source/adapters/hip/usm.cpp index 79337ba87a..18c8fb404a 100644 --- a/source/adapters/hip/usm.cpp +++ b/source/adapters/hip/usm.cpp @@ -108,7 +108,7 @@ ur_result_t USMDeviceAllocImpl(void **ResultPtr, ur_context_handle_t, ur_usm_device_mem_flags_t, size_t Size, [[maybe_unused]] uint32_t Alignment) { try { - ScopedContext Active(Device); + ScopedDevice Active(Device); UR_CHECK_ERROR(hipMalloc(ResultPtr, Size)); } catch (ur_result_t Err) { return Err; @@ -124,7 +124,7 @@ ur_result_t USMSharedAllocImpl(void **ResultPtr, ur_context_handle_t, ur_usm_device_mem_flags_t, size_t Size, [[maybe_unused]] uint32_t Alignment) { try { - ScopedContext Active(Device); + ScopedDevice Active(Device); UR_CHECK_ERROR(hipMallocManaged(ResultPtr, Size, hipMemAttachGlobal)); } catch (ur_result_t Err) { return Err; diff --git a/source/adapters/hip/usm_p2p.cpp b/source/adapters/hip/usm_p2p.cpp index d0d25c2092..5a3effd3c8 100644 --- a/source/adapters/hip/usm_p2p.cpp +++ b/source/adapters/hip/usm_p2p.cpp @@ -14,7 +14,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PEnablePeerAccessExp( ur_device_handle_t commandDevice, ur_device_handle_t peerDevice) { try { - ScopedContext active(commandDevice); + ScopedDevice active(commandDevice); UR_CHECK_ERROR(hipDeviceEnablePeerAccess(peerDevice->get(), 0)); } catch (ur_result_t err) { return err; @@ -25,7 +25,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PEnablePeerAccessExp( UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PDisablePeerAccessExp( ur_device_handle_t commandDevice, ur_device_handle_t peerDevice) { try { - ScopedContext active(commandDevice); + ScopedDevice active(commandDevice); UR_CHECK_ERROR(hipDeviceDisablePeerAccess(peerDevice->get())); } catch (ur_result_t err) { return err; @@ -42,7 +42,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( int value; hipDeviceP2PAttr hipAttr; try { - ScopedContext active(commandDevice); + ScopedDevice active(commandDevice); switch (propName) { case UR_EXP_PEER_INFO_UR_PEER_ACCESS_SUPPORTED: { hipAttr = hipDevP2PAttrAccessSupported; From c159a8e0780297f2f7f907a541d793b5453492d6 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Mon, 8 Jul 2024 15:16:59 +0100 Subject: [PATCH 02/12] Use device ID in hipSetDevice. Signed-off-by: JackAKirk --- source/adapters/hip/device.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/adapters/hip/device.hpp b/source/adapters/hip/device.hpp index 7a0755e52b..db32a4d00e 100644 --- a/source/adapters/hip/device.hpp +++ b/source/adapters/hip/device.hpp @@ -104,7 +104,7 @@ class ScopedDevice { if (!hDevice) { throw UR_RESULT_ERROR_INVALID_DEVICE; } - hipSetDevice(hDevice->get()); + hipSetDevice(hDevice->getIndex()); } }; } // namespace From 26e27108fef040aae0e60a4e6d117ac2284b2b1c Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Mon, 8 Jul 2024 15:41:20 +0100 Subject: [PATCH 03/12] Add missing UR_CHECK_ERROR. Signed-off-by: JackAKirk --- source/adapters/hip/device.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/adapters/hip/device.hpp b/source/adapters/hip/device.hpp index db32a4d00e..f3831a5fb1 100644 --- a/source/adapters/hip/device.hpp +++ b/source/adapters/hip/device.hpp @@ -104,7 +104,7 @@ class ScopedDevice { if (!hDevice) { throw UR_RESULT_ERROR_INVALID_DEVICE; } - hipSetDevice(hDevice->getIndex()); + UR_CHECK_ERROR(hipSetDevice(hDevice->getIndex())); } }; } // namespace From 8e94ada908cb3857b45476c48736dd5c45b90d4e Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Tue, 9 Jul 2024 10:39:44 +0100 Subject: [PATCH 04/12] Fix CI warnings. hip -> HIP in comment. Signed-off-by: JackAKirk --- source/adapters/hip/context.cpp | 7 ++++--- source/adapters/hip/device.hpp | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/source/adapters/hip/context.cpp b/source/adapters/hip/context.cpp index 1dbfa1386a..a862d5307b 100644 --- a/source/adapters/hip/context.cpp +++ b/source/adapters/hip/context.cpp @@ -108,8 +108,9 @@ urContextRetain(ur_context_handle_t hContext) { return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urContextGetNativeHandle( - ur_context_handle_t hContext, ur_native_handle_t *phNativeContext) { +UR_APIEXPORT ur_result_t UR_APICALL +urContextGetNativeHandle([[maybe_unused]] ur_context_handle_t hContext, + ur_native_handle_t *phNativeContext) { // FIXME: this entry point has been deprecated in the SYCL RT and should be // changed to unsupported once the deprecation period has elapsed // The below is extremely dodgy but is the equivalent for what went before @@ -119,7 +120,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextGetNativeHandle( // devices and is meaningless for our purposes; all hipCtx_t APIs were added // for cuda compatibility only and are deprecated by HIP. - hipCtx_t *Ctx; + hipCtx_t *Ctx = nullptr; UR_CHECK_ERROR(hipCtxGetCurrent(Ctx)); *phNativeContext = reinterpret_cast(Ctx); return UR_RESULT_SUCCESS; diff --git a/source/adapters/hip/device.hpp b/source/adapters/hip/device.hpp index f3831a5fb1..bd2b6002e0 100644 --- a/source/adapters/hip/device.hpp +++ b/source/adapters/hip/device.hpp @@ -95,7 +95,7 @@ int getAttribute(ur_device_handle_t Device, hipDeviceAttribute_t Attribute); namespace { /// Scoped Device is used across all UR HIP plugin implementation to activate /// the native Device on the current thread. The ScopedDevice does not -/// reinstate the previous device as all operations in the hip adapter that +/// reinstate the previous device as all operations in the HIP adapter that /// require an active device, set the active device and don't rely on device /// reinstation class ScopedDevice { From b9b1e91f4ece70071e0b7ecf55892c81ad343ac4 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Tue, 9 Jul 2024 15:45:16 +0100 Subject: [PATCH 05/12] Remove out of date test cases. Signed-off-by: JackAKirk --- test/adapters/hip/test_context.cpp | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/test/adapters/hip/test_context.cpp b/test/adapters/hip/test_context.cpp index c8dd7ac315..3b384dcbcf 100644 --- a/test/adapters/hip/test_context.cpp +++ b/test/adapters/hip/test_context.cpp @@ -24,14 +24,6 @@ TEST_P(urHipContextTest, ActiveContexts) { // ensure that the queue has the correct context ASSERT_EQ(context, queue->getContext()); - - // check that the current context is the active HIP context - hipCtx_t hipContext = nullptr; - ASSERT_SUCCESS_HIP(hipCtxGetCurrent(&hipContext)); - ASSERT_NE(hipContext, nullptr); - if (context->getDevices().size() == 1) { - ASSERT_EQ(hipContext, context->getDevices()[0]->getNativeContext()); - } } TEST_P(urHipContextTest, ActiveContextsThreads) { @@ -50,7 +42,6 @@ TEST_P(urHipContextTest, ActiveContextsThreads) { bool thread_done = false; auto test_thread = std::thread([&] { - hipCtx_t current = nullptr; { uur::raii::Queue queue = nullptr; ASSERT_SUCCESS( @@ -59,13 +50,6 @@ TEST_P(urHipContextTest, ActiveContextsThreads) { // ensure queue has the correct context ASSERT_EQ(queue->getContext(), context1); - - // check that the first context is now the active HIP context - ASSERT_SUCCESS_HIP(hipCtxGetCurrent(¤t)); - if (context1->getDevices().size() == 1) { - ASSERT_EQ(current, - context1->getDevices()[0]->getNativeContext()); - } } // mark the first set of processing as done and notify the main thread @@ -90,13 +74,6 @@ TEST_P(urHipContextTest, ActiveContextsThreads) { // ensure the queue has the correct context ASSERT_EQ(queue->getContext(), context2); - - // check that the second context is now the active HIP context - ASSERT_SUCCESS_HIP(hipCtxGetCurrent(¤t)); - if (context2->getDevices().size() == 1) { - ASSERT_EQ(current, - context2->getDevices()[0]->getNativeContext()); - } } }); From da8d17ae21ff4c79573fde1fc481a56cc3f34b78 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Tue, 9 Jul 2024 17:25:18 +0100 Subject: [PATCH 06/12] Use hipDevicePrimaryCtxRetain, get device "ctxt". Signed-off-by: JackAKirk --- source/adapters/hip/context.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/source/adapters/hip/context.cpp b/source/adapters/hip/context.cpp index a862d5307b..f1746f9a69 100644 --- a/source/adapters/hip/context.cpp +++ b/source/adapters/hip/context.cpp @@ -108,9 +108,8 @@ urContextRetain(ur_context_handle_t hContext) { return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL -urContextGetNativeHandle([[maybe_unused]] ur_context_handle_t hContext, - ur_native_handle_t *phNativeContext) { +UR_APIEXPORT ur_result_t UR_APICALL urContextGetNativeHandle( + ur_context_handle_t hContext, ur_native_handle_t *phNativeContext) { // FIXME: this entry point has been deprecated in the SYCL RT and should be // changed to unsupported once the deprecation period has elapsed // The below is extremely dodgy but is the equivalent for what went before @@ -121,7 +120,8 @@ urContextGetNativeHandle([[maybe_unused]] ur_context_handle_t hContext, // for cuda compatibility only and are deprecated by HIP. hipCtx_t *Ctx = nullptr; - UR_CHECK_ERROR(hipCtxGetCurrent(Ctx)); + UR_CHECK_ERROR( + hipDevicePrimaryCtxRetain(Ctx, hContext->getDevices()[0]->get())); *phNativeContext = reinterpret_cast(Ctx); return UR_RESULT_SUCCESS; } From e0ba66b04ee379cf78a521a8684b3ce8dbdd76bd Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Thu, 11 Jul 2024 11:18:01 +0100 Subject: [PATCH 07/12] HIP urContextGetNativeHandle unsupported. Signed-off-by: JackAKirk --- source/adapters/hip/context.cpp | 20 ++++++------------- .../adapters/hip/urContextGetNativeHandle.cpp | 5 ++--- 2 files changed, 8 insertions(+), 17 deletions(-) diff --git a/source/adapters/hip/context.cpp b/source/adapters/hip/context.cpp index f1746f9a69..369a71b621 100644 --- a/source/adapters/hip/context.cpp +++ b/source/adapters/hip/context.cpp @@ -108,22 +108,14 @@ urContextRetain(ur_context_handle_t hContext) { return UR_RESULT_SUCCESS; } +// urContextGetNativeHandle should not be implemented in the HIP backend. +// hipCtx_t is not natively supported by amd devices, and more importantly does +// not map to ur_context_handle_t in any way. UR_APIEXPORT ur_result_t UR_APICALL urContextGetNativeHandle( ur_context_handle_t hContext, ur_native_handle_t *phNativeContext) { - // FIXME: this entry point has been deprecated in the SYCL RT and should be - // changed to unsupported once the deprecation period has elapsed - // The below is extremely dodgy but is the equivalent for what went before - // for continuity: apparently some users may be somehow using this API - // currently, despite it not being well defined. This API should not have been - // implemented in the HIP backend. hipCtx_t is not natively supported by amd - // devices and is meaningless for our purposes; all hipCtx_t APIs were added - // for cuda compatibility only and are deprecated by HIP. - - hipCtx_t *Ctx = nullptr; - UR_CHECK_ERROR( - hipDevicePrimaryCtxRetain(Ctx, hContext->getDevices()[0]->get())); - *phNativeContext = reinterpret_cast(Ctx); - return UR_RESULT_SUCCESS; + std::ignore = hContext; + std::ignore = phNativeContext; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urContextCreateWithNativeHandle( diff --git a/test/adapters/hip/urContextGetNativeHandle.cpp b/test/adapters/hip/urContextGetNativeHandle.cpp index 738c75ce95..4d1ec4df2c 100644 --- a/test/adapters/hip/urContextGetNativeHandle.cpp +++ b/test/adapters/hip/urContextGetNativeHandle.cpp @@ -10,7 +10,6 @@ UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urHipContextGetNativeHandleTest); TEST_P(urHipContextGetNativeHandleTest, Success) { ur_native_handle_t native_context = 0; - ASSERT_SUCCESS(urContextGetNativeHandle(context, &native_context)); - hipCtx_t hip_context = reinterpret_cast(native_context); - std::ignore = hip_context; + auto status = urContextGetNativeHandle(context, &native_context); + ASSERT_EQ(status, UR_RESULT_ERROR_UNSUPPORTED_FEATURE); } From 9dc6a329f72117bc615cf866f45a094cd26aab23 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Thu, 11 Jul 2024 12:37:58 +0100 Subject: [PATCH 08/12] Fix test: mark API success or unsupported. Signed-off-by: JackAKirk --- source/adapters/hip/context.cpp | 5 +++-- .../conformance/context/urContextCreateWithNativeHandle.cpp | 6 ++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/source/adapters/hip/context.cpp b/source/adapters/hip/context.cpp index 369a71b621..092cb02ce7 100644 --- a/source/adapters/hip/context.cpp +++ b/source/adapters/hip/context.cpp @@ -111,8 +111,9 @@ urContextRetain(ur_context_handle_t hContext) { // urContextGetNativeHandle should not be implemented in the HIP backend. // hipCtx_t is not natively supported by amd devices, and more importantly does // not map to ur_context_handle_t in any way. -UR_APIEXPORT ur_result_t UR_APICALL urContextGetNativeHandle( - ur_context_handle_t hContext, ur_native_handle_t *phNativeContext) { +UR_APIEXPORT ur_result_t UR_APICALL +urContextGetNativeHandle([[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_native_handle_t *phNativeContext) { std::ignore = hContext; std::ignore = phNativeContext; return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; diff --git a/test/conformance/context/urContextCreateWithNativeHandle.cpp b/test/conformance/context/urContextCreateWithNativeHandle.cpp index a2a85524a5..30f445d621 100644 --- a/test/conformance/context/urContextCreateWithNativeHandle.cpp +++ b/test/conformance/context/urContextCreateWithNativeHandle.cpp @@ -76,7 +76,8 @@ TEST_P(urContextCreateWithNativeHandleTest, SuccessWithUnOwnedNativeHandle) { TEST_P(urContextCreateWithNativeHandleTest, InvalidNullPointerDevices) { ur_native_handle_t native_context = 0; - ASSERT_SUCCESS(urContextGetNativeHandle(context, &native_context)); + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urContextGetNativeHandle(context, &native_context)); ur_context_handle_t ctx = nullptr; ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_POINTER, @@ -86,7 +87,8 @@ TEST_P(urContextCreateWithNativeHandleTest, InvalidNullPointerDevices) { TEST_P(urContextCreateWithNativeHandleTest, InvalidNullPointerContext) { ur_native_handle_t native_context = 0; - ASSERT_SUCCESS(urContextGetNativeHandle(context, &native_context)); + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urContextGetNativeHandle(context, &native_context)); ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_POINTER, urContextCreateWithNativeHandle(native_context, 1, &device, From 9abb51ebe60ef86ccc86838183e9bf15842c5f08 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Fri, 12 Jul 2024 10:40:28 +0100 Subject: [PATCH 09/12] Update match: API passes if unsupported. The test should have been checking for success or unsupported, since most backends don't support context interop. Signed-off-by: JackAKirk --- test/conformance/context/context_adapter_native_cpu.match | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/conformance/context/context_adapter_native_cpu.match b/test/conformance/context/context_adapter_native_cpu.match index 7823d661f2..3f80da7c36 100644 --- a/test/conformance/context/context_adapter_native_cpu.match +++ b/test/conformance/context/context_adapter_native_cpu.match @@ -1,3 +1 @@ -urContextCreateWithNativeHandleTest.InvalidNullPointerDevices/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urContextCreateWithNativeHandleTest.InvalidNullPointerContext/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} urContextSetExtendedDeleterTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} From d65ade43a903433ffde2010ba5eb18b13a79aaf7 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Wed, 11 Sep 2024 04:11:43 -0700 Subject: [PATCH 10/12] Made requested review changes. Signed-off-by: JackAKirk --- source/adapters/hip/context.hpp | 44 ++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/source/adapters/hip/context.hpp b/source/adapters/hip/context.hpp index c797d7459a..5af95753b8 100644 --- a/source/adapters/hip/context.hpp +++ b/source/adapters/hip/context.hpp @@ -19,6 +19,44 @@ typedef void (*ur_context_extended_deleter_t)(void *UserData); +/// UR context mapping to a HIP context object. +/// +/// There is no direct mapping between a HIP context and a UR context. +/// The main differences are described below: +/// +/// HIP context vs UR context +/// +/// One of the main differences between the UR API and the HIP driver API is +/// that the second modifies the state of the threads by assigning +/// \c hipCtx_t objects to threads. \c hipCtx_t objects store data associated +/// with a given device and control access to said device from the user side. +/// UR API context are objects that are passed to functions, and not bound +/// to threads. +/// +/// Since the \c ur_context_handle_t can contain multiple devices, and a \c +/// hipCtx_t refers to only a single device, the \c hipCtx_t is more tightly +/// coupled to a \c ur_device_handle_t than a \c ur_context_handle_t. In order +/// to remove some ambiguities about the different semantics of \c +/// \c ur_context_handle_t and native \c hipCtx_t, we access the native \c +/// hipCtx_t solely through the \c ur_device_handle_t class, by using the object +/// \ref ScopedContext, which sets the active device (by setting the active +/// native \c hipCtx_t). +/// +/// Primary vs User-defined \c hipCtx_t +/// +/// HIP has two different types of \c hipCtx_t, the Primary context, which is +/// usable by all threads on a given process for a given device, and the +/// aforementioned custom \c hipCtx_t s. The HIP documentation, confirmed with +/// performance analysis, suggest using the Primary context whenever possible. +/// +/// Destructor callback +/// +/// Required to implement CP023, SYCL Extended Context Destruction, +/// the UR Context can store a number of callback functions that will be +/// called upon destruction of the UR Context. +/// See proposal for details. +/// https://github.com/codeplaysoftware/standards-proposals/blob/master/extended-context-destruction/index.md +/// /// /// Destructor callback /// @@ -52,7 +90,11 @@ struct ur_context_handle_t_ { std::atomic_uint32_t RefCount; ur_context_handle_t_(const ur_device_handle_t *Devs, uint32_t NumDevices) - : Devices{Devs, Devs + NumDevices}, RefCount{1} {}; + : Devices{Devs, Devs + NumDevices}, RefCount{1} { + for (auto &Dev : Devices) { + urDeviceRetain(Dev); + } + }; ~ur_context_handle_t_() {} From b8c75516bc67ee795e5842af4c575be547c82a88 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Fri, 20 Sep 2024 04:25:22 -0700 Subject: [PATCH 11/12] Update match file. Signed-off-by: JackAKirk --- test/conformance/context/context_adapter_native_cpu.match | 3 --- 1 file changed, 3 deletions(-) diff --git a/test/conformance/context/context_adapter_native_cpu.match b/test/conformance/context/context_adapter_native_cpu.match index 32b479f09e..3f80da7c36 100644 --- a/test/conformance/context/context_adapter_native_cpu.match +++ b/test/conformance/context/context_adapter_native_cpu.match @@ -1,4 +1 @@ -{{NONDETERMINISTIC}} -urContextCreateWithNativeHandleTest.InvalidNullHandleAdapter/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urContextCreateWithNativeHandleTest.InvalidNullPointerContext/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} urContextSetExtendedDeleterTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} From be38e567d458f04e3dd05bbce9b8a2681aa08c05 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Fri, 20 Sep 2024 06:44:29 -0700 Subject: [PATCH 12/12] Update match for l0 v2. Signed-off-by: JackAKirk --- test/conformance/context/context_adapter_level_zero_v2.match | 3 --- 1 file changed, 3 deletions(-) diff --git a/test/conformance/context/context_adapter_level_zero_v2.match b/test/conformance/context/context_adapter_level_zero_v2.match index e77c47c0cf..2e6ea80468 100644 --- a/test/conformance/context/context_adapter_level_zero_v2.match +++ b/test/conformance/context/context_adapter_level_zero_v2.match @@ -1,4 +1 @@ -{{NONDETERMINISTIC}} -urContextCreateWithNativeHandleTest.InvalidNullHandleAdapter/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -urContextCreateWithNativeHandleTest.InvalidNullPointerContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ urContextSetExtendedDeleterTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__