Skip to content

Commit

Permalink
Merge branch 'master' into dump-v
Browse files Browse the repository at this point in the history
  • Loading branch information
Lifann committed Aug 22, 2024
2 parents 4f95dd4 + efbf8ce commit 54a6501
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 13 deletions.
18 changes: 9 additions & 9 deletions include/merlin/core_kernels/group_lock_kernels.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,15 @@ namespace nv {
namespace merlin {
namespace group_lock {

static __global__ void init_kernel(
__global__ void init_kernel(
cuda::atomic<int, cuda::thread_scope_device>* update_count,
cuda::atomic<int, cuda::thread_scope_device>* read_count,
cuda::atomic<bool, cuda::thread_scope_device>* unique_flag) {
new (update_count) cuda::atomic<int, cuda::thread_scope_device>{0};
new (read_count) cuda::atomic<int, cuda::thread_scope_device>{0};
new (unique_flag) cuda::atomic<bool, cuda::thread_scope_device>{false};
}
static __global__ void lock_read_kernel(
__global__ void lock_read_kernel(
cuda::atomic<int, cuda::thread_scope_device>* update_count,
cuda::atomic<int, cuda::thread_scope_device>* read_count) {
for (;;) {
Expand All @@ -43,12 +43,12 @@ static __global__ void lock_read_kernel(
}
}

static __global__ void unlock_read_kernel(
__global__ void unlock_read_kernel(
cuda::atomic<int, cuda::thread_scope_device>* read_count) {
read_count->fetch_sub(1, cuda::std::memory_order_relaxed);
}

static __global__ void lock_update_kernel(
__global__ void lock_update_kernel(
cuda::atomic<int, cuda::thread_scope_device>* update_count,
cuda::atomic<int, cuda::thread_scope_device>* read_count) {
for (;;) {
Expand All @@ -62,12 +62,12 @@ static __global__ void lock_update_kernel(
}
}

static __global__ void unlock_update_kernel(
__global__ void unlock_update_kernel(
cuda::atomic<int, cuda::thread_scope_device>* update_count) {
update_count->fetch_sub(1, cuda::std::memory_order_relaxed);
}

static __global__ void lock_update_read_kernel(
__global__ void lock_update_read_kernel(
cuda::atomic<int, cuda::thread_scope_device>* update_count,
cuda::atomic<int, cuda::thread_scope_device>* read_count,
cuda::atomic<bool, cuda::thread_scope_device>* unique_flag) {
Expand Down Expand Up @@ -101,7 +101,7 @@ static __global__ void lock_update_read_kernel(
}
}

static __global__ void unlock_update_read_kernel(
__global__ void unlock_update_read_kernel(
cuda::atomic<int, cuda::thread_scope_device>* update_count,
cuda::atomic<int, cuda::thread_scope_device>* read_count,
cuda::atomic<bool, cuda::thread_scope_device>* unique_flag) {
Expand All @@ -110,12 +110,12 @@ static __global__ void unlock_update_read_kernel(
unique_flag->store(false, cuda::std::memory_order_relaxed);
}

static __global__ void update_count_kernel(
__global__ void update_count_kernel(
int* counter, cuda::atomic<int, cuda::thread_scope_device>* update_count) {
*counter = update_count->load(cuda::std::memory_order_relaxed);
}

static __global__ void read_count_kernel(
__global__ void read_count_kernel(
int* counter, cuda::atomic<int, cuda::thread_scope_device>* read_count) {
*counter = read_count->load(cuda::std::memory_order_relaxed);
}
Expand Down
4 changes: 2 additions & 2 deletions include/merlin/types.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ constexpr uint64_t MAX_SCORE = UINT64_C(0xFFFFFFFFFFFFFFFF);
constexpr uint64_t EMPTY_SCORE = UINT64_C(0);
constexpr uint64_t IGNORED_GLOBAL_EPOCH = UINT64_C(0xFFFFFFFFFFFFFFFF);

static uint64_t EMPTY_KEY_CPU = DEFAULT_EMPTY_KEY;
uint64_t EMPTY_KEY_CPU = DEFAULT_EMPTY_KEY;
__constant__ uint64_t EMPTY_KEY = DEFAULT_EMPTY_KEY;
__constant__ uint64_t RECLAIM_KEY = DEFAULT_RECLAIM_KEY;
__constant__ uint64_t LOCKED_KEY = DEFAULT_LOCKED_KEY;
Expand All @@ -76,7 +76,7 @@ __forceinline__ __device__ bool IS_VACANT_KEY(K key) {
return (VACANT_KEY_MASK_1 & key) == VACANT_KEY_MASK_2;
}

static cudaError_t init_reserved_keys(int index) {
cudaError_t init_reserved_keys(int index) {
if (index < 1 || index > MAX_RESERVED_KEY_BIT) {
// index = 0 is the default,
// index = 62 is the maximum index can be set for reserved keys.
Expand Down
3 changes: 1 addition & 2 deletions include/merlin/utils.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -314,8 +314,7 @@ inline void free_pointers(cudaStream_t stream, int n, ...) {
va_end(args);
}

static __global__ void memset64bitKernel(void* devPtr, uint64_t value,
size_t count) {
__global__ void memset64bitKernel(void* devPtr, uint64_t value, size_t count) {
size_t idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < count) {
static_cast<uint64_t*>(devPtr)[idx] = value;
Expand Down

0 comments on commit 54a6501

Please sign in to comment.