Skip to content
This repository has been archived by the owner on Mar 21, 2024. It is now read-only.

Do not move arrival_token in barrier::try_wait #499

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions include/cuda/std/detail/libcxx/include/__cuda/barrier.h
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ friend class _CUDA_VSTD::__barrier_poll_tester_parity;
NV_PROVIDES_SM_90, (
int32_t __ready = 0;
if (!__isClusterShared(&__barrier)) {
return _CUDA_VSTD::__call_try_wait(__barrier, _CUDA_VSTD::move(__token));
return _CUDA_VSTD::__call_try_wait(__barrier, __token);
}
else if (!__isShared(&__barrier)) {
__trap();
Expand All @@ -291,11 +291,11 @@ friend class _CUDA_VSTD::__barrier_poll_tester_parity;
return __ready;
), NV_PROVIDES_SM_80, (
if (!__isShared(&__barrier)) {
return _CUDA_VSTD::__call_try_wait(__barrier, _CUDA_VSTD::move(__token));
return _CUDA_VSTD::__call_try_wait(__barrier, __token);
}
return __test_wait_sm_80(__token);
), NV_ANY_TARGET, (
return _CUDA_VSTD::__call_try_wait(__barrier, _CUDA_VSTD::move(__token));
return _CUDA_VSTD::__call_try_wait(__barrier, __token);
)
)
}
Expand All @@ -304,7 +304,7 @@ friend class _CUDA_VSTD::__barrier_poll_tester_parity;
_LIBCUDACXX_INLINE_VISIBILITY
bool __try_wait(arrival_token __token, _CUDA_VSTD::chrono::nanoseconds __nanosec) const {
if (__nanosec.count() < 1) {
return __try_wait(_CUDA_VSTD::move(__token));
return __try_wait(__token);
}

NV_DISPATCH_TARGET(
Expand Down Expand Up @@ -525,16 +525,16 @@ friend class _CUDA_VSTD::__barrier_poll_tester_parity;

template<class _Rep, class _Period>
_LIBCUDACXX_NODISCARD_ATTRIBUTE _LIBCUDACXX_INLINE_VISIBILITY
bool try_wait_for(arrival_token && __token, const _CUDA_VSTD::chrono::duration<_Rep, _Period>& __dur) {
bool try_wait_for(arrival_token & __token, const _CUDA_VSTD::chrono::duration<_Rep, _Period>& __dur) {
auto __nanosec = _CUDA_VSTD::chrono::duration_cast<_CUDA_VSTD::chrono::nanoseconds>(__dur);

return __try_wait(_CUDA_VSTD::move(__token), __nanosec);
return __try_wait(__token, __nanosec);
}

template<class _Clock, class _Duration>
_LIBCUDACXX_NODISCARD_ATTRIBUTE _LIBCUDACXX_INLINE_VISIBILITY
bool try_wait_until(arrival_token && __token, const _CUDA_VSTD::chrono::time_point<_Clock, _Duration>& __time) {
return try_wait_for(_CUDA_VSTD::move(__token), (__time - _Clock::now()));
bool try_wait_until(arrival_token & __token, const _CUDA_VSTD::chrono::time_point<_Clock, _Duration>& __time) {
return try_wait_for(__token, (__time - _Clock::now()));
}

template<class _Rep, class _Period>
Expand Down