diff --git a/review/pr-177/README.html b/review/pr-177/README.html index 23b869bf4..67584f26b 100644 --- a/review/pr-177/README.html +++ b/review/pr-177/README.html @@ -435,31 +435,31 @@

On pure HBM mode:

0.50

-

1.153

-

2.580

-

1.706

-

1.798

-

4.111

-

1.801

-

1.083

+

1.093

+

2.470

+

1.478

+

1.770

+

3.726

+

1.447

+

1.075

0.75

-

1.010

-

2.561

-

0.683

-

1.863

-

1.939

-

1.305

-

0.994

+

1.045

+

2.452

+

1.335

+

1.807

+

3.374

+

1.309

+

1.013

1.00

-

0.366

-

2.580

-

0.371

-

1.894

-

0.932

-

0.393

-

0.517

+

0.655

+

2.481

+

0.612

+

1.815

+

1.865

+

0.619

+

0.511

@@ -473,19 +473,19 @@

On pure HBM mode:

0.50

-

2.180

-

18.766

-

3.046

+

2.087

+

12.258

+

3.121

0.75

-

2.179

-

18.873

-

3.006

+

2.045

+

12.447

+

3.094

1.00

-

2.081

-

2.813

-

2.988

+

1.950

+

2.657

+

3.096

@@ -506,31 +506,31 @@

On pure HBM mode:

0.50

-

1.068

-

2.325

-

1.277

-

1.759

-

4.113

-

1.807

-

0.932

+

0.961

+

2.272

+

1.278

+

1.706

+

3.718

+

1.435

+

0.931

0.75

-

0.862

-

2.309

-

0.651

-

1.754

-

1.934

-

1.304

-

0.874

+

0.930

+

2.238

+

1.177

+

1.693

+

3.369

+

1.316

+

0.866

1.00

-

0.360

-

2.349

-

0.346

-

1.825

-

0.926

-

0.373

-

0.470

+

0.646

+

2.321

+

0.572

+

1.783

+

1.873

+

0.618

+

0.469

@@ -544,19 +544,19 @@

On pure HBM mode:

0.50

-

0.701

-

15.185

-

3.013

+

0.692

+

10.784

+

3.100

0.75

-

0.578

-

13.840

-

2.981

+

0.569

+

10.240

+

3.075

1.00

-

0.567

-

0.777

-

2.972

+

0.551

+

0.765

+

3.096

@@ -577,31 +577,31 @@

On pure HBM mode:

0.50

-

0.862

-

2.041

-

0.930

-

1.543

-

4.399

-

1.854

-

0.807

+

0.834

+

1.982

+

1.113

+

1.499

+

3.950

+

1.502

+

0.805

0.75

-

0.662

-

2.022

-

0.579

-

1.541

-

1.983

-

1.299

-

0.771

+

0.801

+

1.951

+

1.033

+

1.493

+

3.545

+

1.359

+

0.773

1.00

-

0.334

-

2.052

-

0.335

-

1.590

-

0.939

-

0.393

-

0.473

+

0.621

+

2.021

+

0.608

+

1.541

+

1.965

+

0.613

+

0.481

@@ -615,19 +615,19 @@

On pure HBM mode:

0.50

-

0.321

-

10.791

-

3.124

+

0.316

+

8.199

+

3.239

0.75

-

0.298

-

10.864

-

3.101

+

0.296

+

8.549

+

3.198

1.00

-

0.293

-

0.393

-

3.096

+

0.288

+

0.395

+

3.225

@@ -651,27 +651,27 @@

On HBM+HMEM hybrid mode:

0.50

0.083

-

0.123

-

0.115

+

0.124

+

0.109

0.131

-

4.015

-

1.804

+

3.705

+

1.435

0.75

-

0.082

-

0.123

+

0.083

+

0.122

0.111

0.129

-

1.911

-

1.136

+

3.221

+

1.274

1.00

-

0.069

-

0.110

-

0.085

-

0.106

-

0.929

-

0.393

+

0.073

+

0.123

+

0.095

+

0.126

+

1.854

+

0.617

@@ -685,19 +685,19 @@

On HBM+HMEM hybrid mode:

0.50

-

0.322

-

10.720

-

3.050

+

0.318

+

8.086

+

3.122

0.75

-

0.299

-

10.587

-

3.020

+

0.294

+

5.549

+

3.111

1.00

-

0.293

-

0.394

-

2.994

+

0.287

+

0.393

+

3.075

@@ -718,27 +718,27 @@

On HBM+HMEM hybrid mode:

0.50

0.049

-

0.073

-

0.048

-

0.070

-

3.530

-

1.730

+

0.069

+

0.049

+

0.069

+

3.484

+

1.370

0.75

0.049

-

0.072

-

0.048

0.069

-

1.849

-

1.261

+

0.049

+

0.069

+

3.116

+

1.242

1.00

-

0.044

-

0.068

-

0.044

-

0.062

-

0.911

-

0.393

+

0.047

+

0.072

+

0.047

+

0.070

+

1.771

+

0.607

@@ -752,19 +752,19 @@

On HBM+HMEM hybrid mode:

0.50

-

0.321

-

11.148

-

2.908

+

0.316

+

8.181

+

3.073

0.75

-

0.299

-

11.269

-

2.898

+

0.293

+

8.950

+

3.052

1.00

-

0.293

-

0.396

-

2.782

+

0.292

+

0.394

+

3.026

diff --git a/review/pr-177/_sources/README.md.txt b/review/pr-177/_sources/README.md.txt index 76a647607..798819e34 100644 --- a/review/pr-177/_sources/README.md.txt +++ b/review/pr-177/_sources/README.md.txt @@ -224,43 +224,43 @@ For Benchmark: | λ | insert_or_assign | find | find_or_insert | assign | find* | find_or_insert* | insert_and_evict | |-----:|-----------------:|-------:|---------------:|-------:|-------:|----------------:|-----------------:| -| 0.50 | 1.153 | 2.580 | 1.706 | 1.798 | 4.111 | 1.801 | 1.083 | -| 0.75 | 1.010 | 2.561 | 0.683 | 1.863 | 1.939 | 1.305 | 0.994 | -| 1.00 | 0.366 | 2.580 | 0.371 | 1.894 | 0.932 | 0.393 | 0.517 | +| 0.50 | 1.093 | 2.470 | 1.478 | 1.770 | 3.726 | 1.447 | 1.075 | +| 0.75 | 1.045 | 2.452 | 1.335 | 1.807 | 3.374 | 1.309 | 1.013 | +| 1.00 | 0.655 | 2.481 | 0.612 | 1.815 | 1.865 | 0.619 | 0.511 | -| λ | export_batch | export_batch_if | contains | -|-----:|-------------:|----------------:|----------:| -| 0.50 | 2.180 | 18.766 | 3.046 | -| 0.75 | 2.179 | 18.873 | 3.006 | -| 1.00 | 2.081 | 2.813 | 2.988 | +| λ | export_batch | export_batch_if | contains | +|-----:|-------------:|----------------:|---------:| +| 0.50 | 2.087 | 12.258 | 3.121 | +| 0.75 | 2.045 | 12.447 | 3.094 | +| 1.00 | 1.950 | 2.657 | 3.096 | * dim = 32, capacity = 128 Million-KV, HBM = 16 GB, HMEM = 0 GB | λ | insert_or_assign | find | find_or_insert | assign | find* | find_or_insert* | insert_and_evict | |-----:|-----------------:|-------:|---------------:|-------:|-------:|----------------:|-----------------:| -| 0.50 | 1.068 | 2.325 | 1.277 | 1.759 | 4.113 | 1.807 | 0.932 | -| 0.75 | 0.862 | 2.309 | 0.651 | 1.754 | 1.934 | 1.304 | 0.874 | -| 1.00 | 0.360 | 2.349 | 0.346 | 1.825 | 0.926 | 0.373 | 0.470 | +| 0.50 | 0.961 | 2.272 | 1.278 | 1.706 | 3.718 | 1.435 | 0.931 | +| 0.75 | 0.930 | 2.238 | 1.177 | 1.693 | 3.369 | 1.316 | 0.866 | +| 1.00 | 0.646 | 2.321 | 0.572 | 1.783 | 1.873 | 0.618 | 0.469 | -| λ | export_batch | export_batch_if | contains | -|-----:|-------------:|----------------:|----------:| -| 0.50 | 0.701 | 15.185 | 3.013 | -| 0.75 | 0.578 | 13.840 | 2.981 | -| 1.00 | 0.567 | 0.777 | 2.972 | +| λ | export_batch | export_batch_if | contains | +|-----:|-------------:|----------------:|---------:| +| 0.50 | 0.692 | 10.784 | 3.100 | +| 0.75 | 0.569 | 10.240 | 3.075 | +| 1.00 | 0.551 | 0.765 | 3.096 | * dim = 64, capacity = 64 Million-KV, HBM = 16 GB, HMEM = 0 GB | λ | insert_or_assign | find | find_or_insert | assign | find* | find_or_insert* | insert_and_evict | |-----:|-----------------:|-------:|---------------:|-------:|-------:|----------------:|-----------------:| -| 0.50 | 0.862 | 2.041 | 0.930 | 1.543 | 4.399 | 1.854 | 0.807 | -| 0.75 | 0.662 | 2.022 | 0.579 | 1.541 | 1.983 | 1.299 | 0.771 | -| 1.00 | 0.334 | 2.052 | 0.335 | 1.590 | 0.939 | 0.393 | 0.473 | +| 0.50 | 0.834 | 1.982 | 1.113 | 1.499 | 3.950 | 1.502 | 0.805 | +| 0.75 | 0.801 | 1.951 | 1.033 | 1.493 | 3.545 | 1.359 | 0.773 | +| 1.00 | 0.621 | 2.021 | 0.608 | 1.541 | 1.965 | 0.613 | 0.481 | -| λ | export_batch | export_batch_if | contains | -|-----:|-------------:|----------------:|----------:| -| 0.50 | 0.321 | 10.791 | 3.124 | -| 0.75 | 0.298 | 10.864 | 3.101 | -| 1.00 | 0.293 | 0.393 | 3.096 | +| λ | export_batch | export_batch_if | contains | +|-----:|-------------:|----------------:|---------:| +| 0.50 | 0.316 | 8.199 | 3.239 | +| 0.75 | 0.296 | 8.549 | 3.198 | +| 1.00 | 0.288 | 0.395 | 3.225 | ### On HBM+HMEM hybrid mode: @@ -268,29 +268,29 @@ For Benchmark: | λ | insert_or_assign | find | find_or_insert | assign | find* | find_or_insert* | |-----:|-----------------:|-------:|---------------:|-------:|-------:|----------------:| -| 0.50 | 0.083 | 0.123 | 0.115 | 0.131 | 4.015 | 1.804 | -| 0.75 | 0.082 | 0.123 | 0.111 | 0.129 | 1.911 | 1.136 | -| 1.00 | 0.069 | 0.110 | 0.085 | 0.106 | 0.929 | 0.393 | +| 0.50 | 0.083 | 0.124 | 0.109 | 0.131 | 3.705 | 1.435 | +| 0.75 | 0.083 | 0.122 | 0.111 | 0.129 | 3.221 | 1.274 | +| 1.00 | 0.073 | 0.123 | 0.095 | 0.126 | 1.854 | 0.617 | -| λ | export_batch | export_batch_if | contains | -|-----:|-------------:|----------------:|----------:| -| 0.50 | 0.322 | 10.720 | 3.050 | -| 0.75 | 0.299 | 10.587 | 3.020 | -| 1.00 | 0.293 | 0.394 | 2.994 | +| λ | export_batch | export_batch_if | contains | +|-----:|-------------:|----------------:|---------:| +| 0.50 | 0.318 | 8.086 | 3.122 | +| 0.75 | 0.294 | 5.549 | 3.111 | +| 1.00 | 0.287 | 0.393 | 3.075 | * dim = 64, capacity = 512 Million-KV, HBM = 32 GB, HMEM = 96 GB | λ | insert_or_assign | find | find_or_insert | assign | find* | find_or_insert* | |-----:|-----------------:|-------:|---------------:|-------:|-------:|----------------:| -| 0.50 | 0.049 | 0.073 | 0.048 | 0.070 | 3.530 | 1.730 | -| 0.75 | 0.049 | 0.072 | 0.048 | 0.069 | 1.849 | 1.261 | -| 1.00 | 0.044 | 0.068 | 0.044 | 0.062 | 0.911 | 0.393 | - -| λ | export_batch | export_batch_if | contains | -|-----:|-------------:|----------------:|----------:| -| 0.50 | 0.321 | 11.148 | 2.908 | -| 0.75 | 0.299 | 11.269 | 2.898 | -| 1.00 | 0.293 | 0.396 | 2.782 | +| 0.50 | 0.049 | 0.069 | 0.049 | 0.069 | 3.484 | 1.370 | +| 0.75 | 0.049 | 0.069 | 0.049 | 0.069 | 3.116 | 1.242 | +| 1.00 | 0.047 | 0.072 | 0.047 | 0.070 | 1.771 | 0.607 | + +| λ | export_batch | export_batch_if | contains | +|-----:|-------------:|----------------:|---------:| +| 0.50 | 0.316 | 8.181 | 3.073 | +| 0.75 | 0.293 | 8.950 | 3.052 | +| 1.00 | 0.292 | 0.394 | 3.026 | ### Support and Feedback: diff --git a/review/pr-177/_sources/api/program_listing_file_merlin_hashtable.cuh.rst.txt b/review/pr-177/_sources/api/program_listing_file_merlin_hashtable.cuh.rst.txt index ec63cb12e..44e7a9eeb 100644 --- a/review/pr-177/_sources/api/program_listing_file_merlin_hashtable.cuh.rst.txt +++ b/review/pr-177/_sources/api/program_listing_file_merlin_hashtable.cuh.rst.txt @@ -208,7 +208,7 @@ Program Listing for File merlin_hashtable.cuh const key_type* keys, // (n) const value_type* values, // (n, DIM) const score_type* scores = nullptr, // (n) - cudaStream_t stream = 0, + cudaStream_t stream = 0, bool unique_key = true, bool ignore_evict_strategy = false) { if (n == 0) { return; @@ -226,8 +226,6 @@ Program Listing for File merlin_hashtable.cuh insert_unique_lock lock(mutex_, stream); if (is_fast_mode()) { - using Selector = SelectUpsertKernelWithIO; static thread_local int step_counter = 0; static thread_local float load_factor = 0.0; @@ -235,20 +233,55 @@ Program Listing for File merlin_hashtable.cuh load_factor = fast_load_factor(0, stream, false); } - Selector::execute_kernel( - load_factor, options_.block_size, options_.max_bucket_size, - table_->buckets_num, options_.dim, stream, n, d_table_, - table_->buckets, keys, reinterpret_cast(values), - scores, EvictStrategyParam.global_epoch); + using Selector = KernelSelector_Upsert; + if (Selector::callable(unique_key, + static_cast(options_.max_bucket_size), + static_cast(options_.dim))) { + typename Selector::Params kernelParams( + load_factor, table_->buckets, table_->buckets_size, + table_->buckets_num, + static_cast(options_.max_bucket_size), + static_cast(options_.dim), keys, values, scores, n, + EvictStrategyParam.global_epoch); + Selector::select_kernel(kernelParams, stream); + } else { + using Selector = SelectUpsertKernelWithIO; + Selector::execute_kernel( + load_factor, options_.block_size, options_.max_bucket_size, + table_->buckets_num, options_.dim, stream, n, d_table_, + table_->buckets, keys, reinterpret_cast(values), + scores, EvictStrategyParam.global_epoch); + } } else { - const size_type dev_ws_size{n * (sizeof(value_type*) + sizeof(int))}; + const size_type dev_ws_size{ + n * (sizeof(value_type*) + sizeof(int) + sizeof(key_type*))}; auto dev_ws{dev_mem_pool_->get_workspace<1>(dev_ws_size, stream)}; auto d_dst{dev_ws.get(0)}; - auto d_src_offset{reinterpret_cast(d_dst + n)}; + auto keys_ptr{reinterpret_cast(d_dst + n)}; + auto d_src_offset{reinterpret_cast(keys_ptr + n)}; CUDA_CHECK(cudaMemsetAsync(d_dst, 0, dev_ws_size, stream)); - { + constexpr uint32_t MinBucketCapacityFilter = + sizeof(VecD_Load) / sizeof(D); + + bool filter_condition = + unique_key && options_.max_bucket_size >= MinBucketCapacityFilter && + !options_.io_by_cpu; + + if (filter_condition) { + constexpr uint32_t BLOCK_SIZE = 128; + + upsert_kernel_lock_key_hybrid + <<<(n + BLOCK_SIZE - 1) / BLOCK_SIZE, BLOCK_SIZE, 0, stream>>>( + table_->buckets, table_->buckets_size, table_->buckets_num, + options_.max_bucket_size, options_.dim, keys, d_dst, scores, + keys_ptr, d_src_offset, n, EvictStrategyParam.global_epoch); + + } else { const size_t block_size = options_.block_size; const size_t N = n * TILE_SIZE; const size_t grid_size = SAFE_GET_GRID_SIZE(N, block_size); @@ -269,7 +302,16 @@ Program Listing for File merlin_hashtable.cuh d_src_offset_ptr, thrust::less()); } - if (options_.io_by_cpu) { + if (filter_condition) { + const size_t block_size = options_.io_block_size; + const size_t N = n * dim(); + const size_t grid_size = SAFE_GET_GRID_SIZE(N, block_size); + + write_kernel_unlock_key + <<>>(values, d_dst, d_src_offset, + dim(), keys, keys_ptr, N); + + } else if (options_.io_by_cpu) { const size_type host_ws_size{dev_ws_size + n * sizeof(value_type) * dim()}; auto host_ws{host_mem_pool_->get_workspace<1>(host_ws_size, stream)}; @@ -502,7 +544,7 @@ Program Listing for File merlin_hashtable.cuh void find_or_insert(const size_type n, const key_type* keys, // (n) value_type* values, // (n * DIM) score_type* scores = nullptr, // (n) - cudaStream_t stream = 0, + cudaStream_t stream = 0, bool unique_key = true, bool ignore_evict_strategy = false) { if (n == 0) { return; @@ -520,31 +562,66 @@ Program Listing for File merlin_hashtable.cuh insert_unique_lock lock(mutex_, stream); if (is_fast_mode()) { - using Selector = - SelectFindOrInsertKernelWithIO; static thread_local int step_counter = 0; static thread_local float load_factor = 0.0; if (((step_counter++) % kernel_select_interval_) == 0) { load_factor = fast_load_factor(0, stream, false); } - Selector::execute_kernel(load_factor, options_.block_size, - options_.max_bucket_size, table_->buckets_num, - options_.dim, stream, n, d_table_, - table_->buckets, keys, values, scores, - EvictStrategyParam.global_epoch); + + using Selector = + KernelSelector_FindOrInsert; + if (Selector::callable(unique_key, + static_cast(options_.max_bucket_size), + static_cast(options_.dim))) { + typename Selector::Params kernelParams( + load_factor, table_->buckets, table_->buckets_size, + table_->buckets_num, + static_cast(options_.max_bucket_size), + static_cast(options_.dim), keys, values, scores, n, + EvictStrategyParam.global_epoch); + Selector::select_kernel(kernelParams, stream); + } else { + using Selector = + SelectFindOrInsertKernelWithIO; + Selector::execute_kernel(load_factor, options_.block_size, + options_.max_bucket_size, table_->buckets_num, + options_.dim, stream, n, d_table_, + table_->buckets, keys, values, scores, + EvictStrategyParam.global_epoch); + } } else { - const size_type dev_ws_size{ - n * (sizeof(value_type*) + sizeof(int) + sizeof(bool))}; + const size_type dev_ws_size{n * (sizeof(value_type*) + sizeof(int) + + sizeof(bool) + sizeof(key_type*))}; auto dev_ws{dev_mem_pool_->get_workspace<1>(dev_ws_size, stream)}; auto d_table_value_addrs{dev_ws.get(0)}; - auto param_key_index{reinterpret_cast(d_table_value_addrs + n)}; + auto keys_ptr{reinterpret_cast(d_table_value_addrs + n)}; + auto param_key_index{reinterpret_cast(keys_ptr + n)}; auto founds{reinterpret_cast(param_key_index + n)}; CUDA_CHECK(cudaMemsetAsync(d_table_value_addrs, 0, dev_ws_size, stream)); - { + constexpr uint32_t MinBucketCapacityFilter = + sizeof(VecD_Load) / sizeof(D); + + bool filter_condition = + unique_key && options_.max_bucket_size >= MinBucketCapacityFilter && + !options_.io_by_cpu; + + if (filter_condition) { + constexpr uint32_t BLOCK_SIZE = 128; + + find_or_insert_kernel_lock_key_hybrid + <<<(n + BLOCK_SIZE - 1) / BLOCK_SIZE, BLOCK_SIZE, 0, stream>>>( + table_->buckets, table_->buckets_size, table_->buckets_num, + options_.max_bucket_size, options_.dim, keys, + d_table_value_addrs, scores, keys_ptr, param_key_index, founds, + n, EvictStrategyParam.global_epoch); + + } else { const size_t block_size = options_.block_size; const size_t N = n * TILE_SIZE; const size_t grid_size = SAFE_GET_GRID_SIZE(N, block_size); @@ -567,7 +644,17 @@ Program Listing for File merlin_hashtable.cuh thrust::less()); } - if (options_.io_by_cpu) { + if (filter_condition) { + const size_t block_size = options_.io_block_size; + const size_t N = n * dim(); + const size_t grid_size = SAFE_GET_GRID_SIZE(N, block_size); + + read_or_write_kernel_unlock_key + <<>>(d_table_value_addrs, values, + founds, param_key_index, + keys_ptr, keys, dim(), N); + + } else if (options_.io_by_cpu) { const size_type host_ws_size{ dev_ws_size + n * (sizeof(bool) + sizeof(value_type) * dim())}; auto host_ws{host_mem_pool_->get_workspace<1>(host_ws_size, stream)}; @@ -609,7 +696,7 @@ Program Listing for File merlin_hashtable.cuh value_type** values, // (n) bool* founds, // (n) score_type* scores = nullptr, // (n) - cudaStream_t stream = 0, + cudaStream_t stream = 0, bool unique_key = true, bool ignore_evict_strategy = false) { if (n == 0) { return; @@ -626,18 +713,41 @@ Program Listing for File merlin_hashtable.cuh insert_unique_lock lock(mutex_, stream); - using Selector = SelectFindOrInsertPtrKernel; - static thread_local int step_counter = 0; - static thread_local float load_factor = 0.0; + constexpr uint32_t MinBucketCapacityFilter = sizeof(VecD_Load) / sizeof(D); - if (((step_counter++) % kernel_select_interval_) == 0) { - load_factor = fast_load_factor(0, stream, false); + if (unique_key && options_.max_bucket_size >= MinBucketCapacityFilter) { + constexpr uint32_t BLOCK_SIZE = 128U; + + const size_type dev_ws_size{n * sizeof(key_type**)}; + auto dev_ws{dev_mem_pool_->get_workspace<1>(dev_ws_size, stream)}; + auto keys_ptr{dev_ws.get(0)}; + CUDA_CHECK(cudaMemsetAsync(keys_ptr, 0, dev_ws_size, stream)); + + find_or_insert_ptr_kernel_lock_key + <<<(n + BLOCK_SIZE - 1) / BLOCK_SIZE, BLOCK_SIZE, 0, stream>>>( + table_->buckets, table_->buckets_size, table_->buckets_num, + options_.max_bucket_size, options_.dim, keys, values, scores, + keys_ptr, n, founds, EvictStrategyParam.global_epoch); + + find_or_insert_ptr_kernel_unlock_key + <<<(n + BLOCK_SIZE - 1) / BLOCK_SIZE, BLOCK_SIZE, 0, stream>>>( + keys, keys_ptr, n); + } else { + using Selector = SelectFindOrInsertPtrKernel; + static thread_local int step_counter = 0; + static thread_local float load_factor = 0.0; + + if (((step_counter++) % kernel_select_interval_) == 0) { + load_factor = fast_load_factor(0, stream, false); + } + Selector::execute_kernel(load_factor, options_.block_size, + options_.max_bucket_size, table_->buckets_num, + options_.dim, stream, n, d_table_, + table_->buckets, keys, values, scores, founds, + EvictStrategyParam.global_epoch); } - Selector::execute_kernel( - load_factor, options_.block_size, options_.max_bucket_size, - table_->buckets_num, options_.dim, stream, n, d_table_, table_->buckets, - keys, values, scores, founds, EvictStrategyParam.global_epoch); CudaCheckError(); } @@ -680,14 +790,33 @@ Program Listing for File merlin_hashtable.cuh EvictStrategyParam.global_epoch); } } else { - const size_type dev_ws_size{n * (sizeof(value_type*) + sizeof(int))}; + const size_type dev_ws_size{ + n * (sizeof(value_type*) + sizeof(key_type) + sizeof(int))}; auto dev_ws{dev_mem_pool_->get_workspace<1>(dev_ws_size, stream)}; auto d_dst{dev_ws.get(0)}; - auto d_src_offset{reinterpret_cast(d_dst + n)}; + auto keys_ptr{reinterpret_cast(d_dst + n)}; + auto d_src_offset{reinterpret_cast(keys_ptr + n)}; CUDA_CHECK(cudaMemsetAsync(d_dst, 0, dev_ws_size, stream)); - { + constexpr uint32_t MinBucketCapacityFilter = + sizeof(VecD_Load) / sizeof(D); + + bool filter_condition = + options_.max_bucket_size >= MinBucketCapacityFilter && + !options_.io_by_cpu && unique_key; + + if (filter_condition) { + constexpr uint32_t BLOCK_SIZE = 128U; + + tlp_update_kernel_hybrid + <<<(n + BLOCK_SIZE - 1) / BLOCK_SIZE, BLOCK_SIZE, 0, stream>>>( + table_->buckets, table_->buckets_num, options_.max_bucket_size, + options_.dim, keys, d_dst, scores, keys_ptr, d_src_offset, + EvictStrategyParam.global_epoch, n); + + } else { const size_t block_size = options_.block_size; const size_t N = n * TILE_SIZE; const size_t grid_size = SAFE_GET_GRID_SIZE(N, block_size); @@ -708,7 +837,16 @@ Program Listing for File merlin_hashtable.cuh d_src_offset_ptr, thrust::less()); } - if (options_.io_by_cpu) { + if (filter_condition) { + const size_t block_size = options_.io_block_size; + const size_t N = n * dim(); + const size_t grid_size = SAFE_GET_GRID_SIZE(N, block_size); + + write_kernel_unlock_key + <<>>(values, d_dst, d_src_offset, + dim(), keys, keys_ptr, N); + + } else if (options_.io_by_cpu) { const size_type host_ws_size{dev_ws_size + n * sizeof(value_type) * dim()}; auto host_ws{host_mem_pool_->get_workspace<1>(host_ws_size, stream)}; @@ -822,7 +960,20 @@ Program Listing for File merlin_hashtable.cuh CUDA_CHECK(cudaMemsetAsync(src, 0, dev_ws_size, stream)); - { + constexpr uint32_t MinBucketCapacityFilter = + sizeof(VecD_Load) / sizeof(D); + + bool filter_condition = + options_.max_bucket_size >= MinBucketCapacityFilter; + + if (filter_condition) { + constexpr uint32_t BLOCK_SIZE = 128U; + + tlp_lookup_kernel_hybrid + <<<(n + BLOCK_SIZE - 1) / BLOCK_SIZE, BLOCK_SIZE, 0, stream>>>( + table_->buckets, table_->buckets_num, options_.max_bucket_size, + options_.dim, keys, src, scores, dst_offset, founds, n); + } else { const size_t block_size = options_.block_size; const size_t N = n * TILE_SIZE; const size_t grid_size = SAFE_GET_GRID_SIZE(N, block_size); @@ -859,7 +1010,7 @@ Program Listing for File merlin_hashtable.cuh value_type** values, // (n) bool* founds, // (n) score_type* scores = nullptr, // (n) - cudaStream_t stream = 0) const { + cudaStream_t stream = 0, bool unique_key = true) const { if (n == 0) { return; } @@ -868,17 +1019,27 @@ Program Listing for File merlin_hashtable.cuh read_shared_lock lock(mutex_, stream); - using Selector = SelectLookupPtrKernel; - static thread_local int step_counter = 0; - static thread_local float load_factor = 0.0; + constexpr uint32_t MinBucketCapacityFilter = sizeof(VecD_Load) / sizeof(D); + if (unique_key && options_.max_bucket_size >= MinBucketCapacityFilter) { + constexpr uint32_t BLOCK_SIZE = 128U; + tlp_lookup_ptr_kernel_with_filter + <<<(n + BLOCK_SIZE - 1) / BLOCK_SIZE, BLOCK_SIZE, 0, stream>>>( + table_->buckets, table_->buckets_num, options_.max_bucket_size, + options_.dim, keys, values, scores, founds, n); + } else { + using Selector = SelectLookupPtrKernel; + static thread_local int step_counter = 0; + static thread_local float load_factor = 0.0; - if (((step_counter++) % kernel_select_interval_) == 0) { - load_factor = fast_load_factor(0, stream, false); + if (((step_counter++) % kernel_select_interval_) == 0) { + load_factor = fast_load_factor(0, stream, false); + } + + Selector::execute_kernel(load_factor, options_.block_size, + options_.max_bucket_size, table_->buckets_num, + options_.dim, stream, n, d_table_, + table_->buckets, keys, values, scores, founds); } - Selector::execute_kernel(load_factor, options_.block_size, - options_.max_bucket_size, table_->buckets_num, - options_.dim, stream, n, d_table_, table_->buckets, - keys, values, scores, founds); CudaCheckError(); } @@ -1273,7 +1434,7 @@ Program Listing for File merlin_hashtable.cuh } EvictStrategy::set_global_epoch(static_cast(IGNORED_GLOBAL_EPOCH)); - insert_or_assign(count, d_keys, d_values, d_scores, stream, true); + insert_or_assign(count, d_keys, d_values, d_scores, stream, true, true); total_count += count; // Read next batch. diff --git a/review/pr-177/api/classnv_1_1merlin_1_1HashTable.html b/review/pr-177/api/classnv_1_1merlin_1_1HashTable.html index aa0bcfb22..938093233 100644 --- a/review/pr-177/api/classnv_1_1merlin_1_1HashTable.html +++ b/review/pr-177/api/classnv_1_1merlin_1_1HashTable.html @@ -175,8 +175,8 @@

Class Documentation -
-inline void insert_or_assign(const size_type n, const key_type *keys, const value_type *values, const score_type *scores = nullptr, cudaStream_t stream = 0, bool ignore_evict_strategy = false)
+
+inline void insert_or_assign(const size_type n, const key_type *keys, const value_type *values, const score_type *scores = nullptr, cudaStream_t stream = 0, bool unique_key = true, bool ignore_evict_strategy = false)

Insert new key-value-score tuples into the hash table. If the key already exists, the values and scores are assigned new values.

If the target bucket is full, the keys with minimum score will be overwritten by new key unless the score of the new key is even less than minimum score of the target bucket.

@@ -188,7 +188,8 @@

Class Documentationuint64_t value. You can specify a value that such as the timestamp of the key insertion, number of the key occurrences, or another value to perform a custom eviction strategy.

The scores should be nullptr, when the LRU eviction strategy is applied.

-
  • stream – The CUDA stream that is used to execute the operation.

  • +
  • stream – The CUDA stream that is used to execute the operation.

  • +
  • unique_key – If all keys in the same batch are unique.

  • ignore_evict_strategy – A boolean option indicating whether if the insert_or_assign ignores the evict strategy of table with current scores anyway. If true, it does not check whether the scores conforms to the evict strategy. If false, it requires the scores follow the evict strategy of table.

  • @@ -272,8 +273,8 @@

    Class Documentation -
    -inline void find_or_insert(const size_type n, const key_type *keys, value_type *values, score_type *scores = nullptr, cudaStream_t stream = 0, bool ignore_evict_strategy = false)
    +
    +inline void find_or_insert(const size_type n, const key_type *keys, value_type *values, score_type *scores = nullptr, cudaStream_t stream = 0, bool unique_key = true, bool ignore_evict_strategy = false)

    Searches the hash table for the specified keys. When a key is missing, the value in values and scores will be inserted.

    Parameters
    @@ -284,14 +285,15 @@

    Class Documentationscores is nullptr, the score for each key will not be returned.

  • stream – The CUDA stream that is used to execute the operation.

  • +
  • unique_key – If all keys in the same batch are unique.

  • -
    -inline void find_or_insert(const size_type n, const key_type *keys, value_type **values, bool *founds, score_type *scores = nullptr, cudaStream_t stream = 0, bool ignore_evict_strategy = false)
    +
    +inline void find_or_insert(const size_type n, const key_type *keys, value_type **values, bool *founds, score_type *scores = nullptr, cudaStream_t stream = 0, bool unique_key = true, bool ignore_evict_strategy = false)

    Searches the hash table for the specified keys and returns address of the values. When a key is missing, the value in values and scores will be inserted.

    @@ -375,8 +378,8 @@

    Class Documentation -
    -inline void find(const size_type n, const key_type *keys, value_type **values, bool *founds, score_type *scores = nullptr, cudaStream_t stream = 0) const
    +
    +inline void find(const size_type n, const key_type *keys, value_type **values, bool *founds, score_type *scores = nullptr, cudaStream_t stream = 0, bool unique_key = true) const

    Searches the hash table for the specified keys and returns address of the values.

    diff --git a/review/pr-177/api/program_listing_file_merlin_hashtable.cuh.html b/review/pr-177/api/program_listing_file_merlin_hashtable.cuh.html index 6cf75df63..5c00ed5c5 100644 --- a/review/pr-177/api/program_listing_file_merlin_hashtable.cuh.html +++ b/review/pr-177/api/program_listing_file_merlin_hashtable.cuh.html @@ -284,7 +284,7 @@ const key_type* keys, // (n) const value_type* values, // (n, DIM) const score_type* scores = nullptr, // (n) - cudaStream_t stream = 0, + cudaStream_t stream = 0, bool unique_key = true, bool ignore_evict_strategy = false) { if (n == 0) { return; @@ -302,8 +302,6 @@ insert_unique_lock lock(mutex_, stream); if (is_fast_mode()) { - using Selector = SelectUpsertKernelWithIO<key_type, value_type, - score_type, evict_strategy>; static thread_local int step_counter = 0; static thread_local float load_factor = 0.0; @@ -311,20 +309,55 @@ load_factor = fast_load_factor(0, stream, false); } - Selector::execute_kernel( - load_factor, options_.block_size, options_.max_bucket_size, - table_->buckets_num, options_.dim, stream, n, d_table_, - table_->buckets, keys, reinterpret_cast<const value_type*>(values), - scores, EvictStrategyParam.global_epoch); + using Selector = KernelSelector_Upsert<key_type, value_type, score_type, + evict_strategy, ArchTag>; + if (Selector::callable(unique_key, + static_cast<uint32_t>(options_.max_bucket_size), + static_cast<uint32_t>(options_.dim))) { + typename Selector::Params kernelParams( + load_factor, table_->buckets, table_->buckets_size, + table_->buckets_num, + static_cast<uint32_t>(options_.max_bucket_size), + static_cast<uint32_t>(options_.dim), keys, values, scores, n, + EvictStrategyParam.global_epoch); + Selector::select_kernel(kernelParams, stream); + } else { + using Selector = SelectUpsertKernelWithIO<key_type, value_type, + score_type, evict_strategy>; + Selector::execute_kernel( + load_factor, options_.block_size, options_.max_bucket_size, + table_->buckets_num, options_.dim, stream, n, d_table_, + table_->buckets, keys, reinterpret_cast<const value_type*>(values), + scores, EvictStrategyParam.global_epoch); + } } else { - const size_type dev_ws_size{n * (sizeof(value_type*) + sizeof(int))}; + const size_type dev_ws_size{ + n * (sizeof(value_type*) + sizeof(int) + sizeof(key_type*))}; auto dev_ws{dev_mem_pool_->get_workspace<1>(dev_ws_size, stream)}; auto d_dst{dev_ws.get<value_type**>(0)}; - auto d_src_offset{reinterpret_cast<int*>(d_dst + n)}; + auto keys_ptr{reinterpret_cast<key_type**>(d_dst + n)}; + auto d_src_offset{reinterpret_cast<int*>(keys_ptr + n)}; CUDA_CHECK(cudaMemsetAsync(d_dst, 0, dev_ws_size, stream)); - { + constexpr uint32_t MinBucketCapacityFilter = + sizeof(VecD_Load) / sizeof(D); + + bool filter_condition = + unique_key && options_.max_bucket_size >= MinBucketCapacityFilter && + !options_.io_by_cpu; + + if (filter_condition) { + constexpr uint32_t BLOCK_SIZE = 128; + + upsert_kernel_lock_key_hybrid<key_type, value_type, score_type, + BLOCK_SIZE, evict_strategy> + <<<(n + BLOCK_SIZE - 1) / BLOCK_SIZE, BLOCK_SIZE, 0, stream>>>( + table_->buckets, table_->buckets_size, table_->buckets_num, + options_.max_bucket_size, options_.dim, keys, d_dst, scores, + keys_ptr, d_src_offset, n, EvictStrategyParam.global_epoch); + + } else { const size_t block_size = options_.block_size; const size_t N = n * TILE_SIZE; const size_t grid_size = SAFE_GET_GRID_SIZE(N, block_size); @@ -345,7 +378,16 @@ d_src_offset_ptr, thrust::less<uintptr_t>()); } - if (options_.io_by_cpu) { + if (filter_condition) { + const size_t block_size = options_.io_block_size; + const size_t N = n * dim(); + const size_t grid_size = SAFE_GET_GRID_SIZE(N, block_size); + + write_kernel_unlock_key<key_type, value_type, score_type> + <<<grid_size, block_size, 0, stream>>>(values, d_dst, d_src_offset, + dim(), keys, keys_ptr, N); + + } else if (options_.io_by_cpu) { const size_type host_ws_size{dev_ws_size + n * sizeof(value_type) * dim()}; auto host_ws{host_mem_pool_->get_workspace<1>(host_ws_size, stream)}; @@ -578,7 +620,7 @@ void find_or_insert(const size_type n, const key_type* keys, // (n) value_type* values, // (n * DIM) score_type* scores = nullptr, // (n) - cudaStream_t stream = 0, + cudaStream_t stream = 0, bool unique_key = true, bool ignore_evict_strategy = false) { if (n == 0) { return; @@ -596,31 +638,66 @@ insert_unique_lock lock(mutex_, stream); if (is_fast_mode()) { - using Selector = - SelectFindOrInsertKernelWithIO<key_type, value_type, score_type, - evict_strategy>; static thread_local int step_counter = 0; static thread_local float load_factor = 0.0; if (((step_counter++) % kernel_select_interval_) == 0) { load_factor = fast_load_factor(0, stream, false); } - Selector::execute_kernel(load_factor, options_.block_size, - options_.max_bucket_size, table_->buckets_num, - options_.dim, stream, n, d_table_, - table_->buckets, keys, values, scores, - EvictStrategyParam.global_epoch); + + using Selector = + KernelSelector_FindOrInsert<key_type, value_type, score_type, + evict_strategy, ArchTag>; + if (Selector::callable(unique_key, + static_cast<uint32_t>(options_.max_bucket_size), + static_cast<uint32_t>(options_.dim))) { + typename Selector::Params kernelParams( + load_factor, table_->buckets, table_->buckets_size, + table_->buckets_num, + static_cast<uint32_t>(options_.max_bucket_size), + static_cast<uint32_t>(options_.dim), keys, values, scores, n, + EvictStrategyParam.global_epoch); + Selector::select_kernel(kernelParams, stream); + } else { + using Selector = + SelectFindOrInsertKernelWithIO<key_type, value_type, score_type, + evict_strategy>; + Selector::execute_kernel(load_factor, options_.block_size, + options_.max_bucket_size, table_->buckets_num, + options_.dim, stream, n, d_table_, + table_->buckets, keys, values, scores, + EvictStrategyParam.global_epoch); + } } else { - const size_type dev_ws_size{ - n * (sizeof(value_type*) + sizeof(int) + sizeof(bool))}; + const size_type dev_ws_size{n * (sizeof(value_type*) + sizeof(int) + + sizeof(bool) + sizeof(key_type*))}; auto dev_ws{dev_mem_pool_->get_workspace<1>(dev_ws_size, stream)}; auto d_table_value_addrs{dev_ws.get<value_type**>(0)}; - auto param_key_index{reinterpret_cast<int*>(d_table_value_addrs + n)}; + auto keys_ptr{reinterpret_cast<key_type**>(d_table_value_addrs + n)}; + auto param_key_index{reinterpret_cast<int*>(keys_ptr + n)}; auto founds{reinterpret_cast<bool*>(param_key_index + n)}; CUDA_CHECK(cudaMemsetAsync(d_table_value_addrs, 0, dev_ws_size, stream)); - { + constexpr uint32_t MinBucketCapacityFilter = + sizeof(VecD_Load) / sizeof(D); + + bool filter_condition = + unique_key && options_.max_bucket_size >= MinBucketCapacityFilter && + !options_.io_by_cpu; + + if (filter_condition) { + constexpr uint32_t BLOCK_SIZE = 128; + + find_or_insert_kernel_lock_key_hybrid<key_type, value_type, score_type, + BLOCK_SIZE, evict_strategy> + <<<(n + BLOCK_SIZE - 1) / BLOCK_SIZE, BLOCK_SIZE, 0, stream>>>( + table_->buckets, table_->buckets_size, table_->buckets_num, + options_.max_bucket_size, options_.dim, keys, + d_table_value_addrs, scores, keys_ptr, param_key_index, founds, + n, EvictStrategyParam.global_epoch); + + } else { const size_t block_size = options_.block_size; const size_t N = n * TILE_SIZE; const size_t grid_size = SAFE_GET_GRID_SIZE(N, block_size); @@ -643,7 +720,17 @@ thrust::less<uintptr_t>()); } - if (options_.io_by_cpu) { + if (filter_condition) { + const size_t block_size = options_.io_block_size; + const size_t N = n * dim(); + const size_t grid_size = SAFE_GET_GRID_SIZE(N, block_size); + + read_or_write_kernel_unlock_key<key_type, value_type, score_type, V> + <<<grid_size, block_size, 0, stream>>>(d_table_value_addrs, values, + founds, param_key_index, + keys_ptr, keys, dim(), N); + + } else if (options_.io_by_cpu) { const size_type host_ws_size{ dev_ws_size + n * (sizeof(bool) + sizeof(value_type) * dim())}; auto host_ws{host_mem_pool_->get_workspace<1>(host_ws_size, stream)}; @@ -685,7 +772,7 @@ value_type** values, // (n) bool* founds, // (n) score_type* scores = nullptr, // (n) - cudaStream_t stream = 0, + cudaStream_t stream = 0, bool unique_key = true, bool ignore_evict_strategy = false) { if (n == 0) { return; @@ -702,18 +789,41 @@ insert_unique_lock lock(mutex_, stream); - using Selector = SelectFindOrInsertPtrKernel<key_type, value_type, - score_type, evict_strategy>; - static thread_local int step_counter = 0; - static thread_local float load_factor = 0.0; + constexpr uint32_t MinBucketCapacityFilter = sizeof(VecD_Load) / sizeof(D); - if (((step_counter++) % kernel_select_interval_) == 0) { - load_factor = fast_load_factor(0, stream, false); + if (unique_key && options_.max_bucket_size >= MinBucketCapacityFilter) { + constexpr uint32_t BLOCK_SIZE = 128U; + + const size_type dev_ws_size{n * sizeof(key_type**)}; + auto dev_ws{dev_mem_pool_->get_workspace<1>(dev_ws_size, stream)}; + auto keys_ptr{dev_ws.get<key_type**>(0)}; + CUDA_CHECK(cudaMemsetAsync(keys_ptr, 0, dev_ws_size, stream)); + + find_or_insert_ptr_kernel_lock_key<key_type, value_type, score_type, + BLOCK_SIZE, evict_strategy> + <<<(n + BLOCK_SIZE - 1) / BLOCK_SIZE, BLOCK_SIZE, 0, stream>>>( + table_->buckets, table_->buckets_size, table_->buckets_num, + options_.max_bucket_size, options_.dim, keys, values, scores, + keys_ptr, n, founds, EvictStrategyParam.global_epoch); + + find_or_insert_ptr_kernel_unlock_key<key_type> + <<<(n + BLOCK_SIZE - 1) / BLOCK_SIZE, BLOCK_SIZE, 0, stream>>>( + keys, keys_ptr, n); + } else { + using Selector = SelectFindOrInsertPtrKernel<key_type, value_type, + score_type, evict_strategy>; + static thread_local int step_counter = 0; + static thread_local float load_factor = 0.0; + + if (((step_counter++) % kernel_select_interval_) == 0) { + load_factor = fast_load_factor(0, stream, false); + } + Selector::execute_kernel(load_factor, options_.block_size, + options_.max_bucket_size, table_->buckets_num, + options_.dim, stream, n, d_table_, + table_->buckets, keys, values, scores, founds, + EvictStrategyParam.global_epoch); } - Selector::execute_kernel( - load_factor, options_.block_size, options_.max_bucket_size, - table_->buckets_num, options_.dim, stream, n, d_table_, table_->buckets, - keys, values, scores, founds, EvictStrategyParam.global_epoch); CudaCheckError(); } @@ -756,14 +866,33 @@ EvictStrategyParam.global_epoch); } } else { - const size_type dev_ws_size{n * (sizeof(value_type*) + sizeof(int))}; + const size_type dev_ws_size{ + n * (sizeof(value_type*) + sizeof(key_type) + sizeof(int))}; auto dev_ws{dev_mem_pool_->get_workspace<1>(dev_ws_size, stream)}; auto d_dst{dev_ws.get<value_type**>(0)}; - auto d_src_offset{reinterpret_cast<int*>(d_dst + n)}; + auto keys_ptr{reinterpret_cast<key_type**>(d_dst + n)}; + auto d_src_offset{reinterpret_cast<int*>(keys_ptr + n)}; CUDA_CHECK(cudaMemsetAsync(d_dst, 0, dev_ws_size, stream)); - { + constexpr uint32_t MinBucketCapacityFilter = + sizeof(VecD_Load) / sizeof(D); + + bool filter_condition = + options_.max_bucket_size >= MinBucketCapacityFilter && + !options_.io_by_cpu && unique_key; + + if (filter_condition) { + constexpr uint32_t BLOCK_SIZE = 128U; + + tlp_update_kernel_hybrid<key_type, value_type, score_type, + evict_strategy> + <<<(n + BLOCK_SIZE - 1) / BLOCK_SIZE, BLOCK_SIZE, 0, stream>>>( + table_->buckets, table_->buckets_num, options_.max_bucket_size, + options_.dim, keys, d_dst, scores, keys_ptr, d_src_offset, + EvictStrategyParam.global_epoch, n); + + } else { const size_t block_size = options_.block_size; const size_t N = n * TILE_SIZE; const size_t grid_size = SAFE_GET_GRID_SIZE(N, block_size); @@ -784,7 +913,16 @@ d_src_offset_ptr, thrust::less<uintptr_t>()); } - if (options_.io_by_cpu) { + if (filter_condition) { + const size_t block_size = options_.io_block_size; + const size_t N = n * dim(); + const size_t grid_size = SAFE_GET_GRID_SIZE(N, block_size); + + write_kernel_unlock_key<key_type, value_type, score_type> + <<<grid_size, block_size, 0, stream>>>(values, d_dst, d_src_offset, + dim(), keys, keys_ptr, N); + + } else if (options_.io_by_cpu) { const size_type host_ws_size{dev_ws_size + n * sizeof(value_type) * dim()}; auto host_ws{host_mem_pool_->get_workspace<1>(host_ws_size, stream)}; @@ -898,7 +1036,20 @@ CUDA_CHECK(cudaMemsetAsync(src, 0, dev_ws_size, stream)); - { + constexpr uint32_t MinBucketCapacityFilter = + sizeof(VecD_Load) / sizeof(D); + + bool filter_condition = + options_.max_bucket_size >= MinBucketCapacityFilter; + + if (filter_condition) { + constexpr uint32_t BLOCK_SIZE = 128U; + + tlp_lookup_kernel_hybrid<key_type, value_type, score_type> + <<<(n + BLOCK_SIZE - 1) / BLOCK_SIZE, BLOCK_SIZE, 0, stream>>>( + table_->buckets, table_->buckets_num, options_.max_bucket_size, + options_.dim, keys, src, scores, dst_offset, founds, n); + } else { const size_t block_size = options_.block_size; const size_t N = n * TILE_SIZE; const size_t grid_size = SAFE_GET_GRID_SIZE(N, block_size); @@ -935,7 +1086,7 @@ value_type** values, // (n) bool* founds, // (n) score_type* scores = nullptr, // (n) - cudaStream_t stream = 0) const { + cudaStream_t stream = 0, bool unique_key = true) const { if (n == 0) { return; } @@ -944,17 +1095,27 @@ read_shared_lock lock(mutex_, stream); - using Selector = SelectLookupPtrKernel<key_type, value_type, score_type>; - static thread_local int step_counter = 0; - static thread_local float load_factor = 0.0; + constexpr uint32_t MinBucketCapacityFilter = sizeof(VecD_Load) / sizeof(D); + if (unique_key && options_.max_bucket_size >= MinBucketCapacityFilter) { + constexpr uint32_t BLOCK_SIZE = 128U; + tlp_lookup_ptr_kernel_with_filter<key_type, value_type, score_type> + <<<(n + BLOCK_SIZE - 1) / BLOCK_SIZE, BLOCK_SIZE, 0, stream>>>( + table_->buckets, table_->buckets_num, options_.max_bucket_size, + options_.dim, keys, values, scores, founds, n); + } else { + using Selector = SelectLookupPtrKernel<key_type, value_type, score_type>; + static thread_local int step_counter = 0; + static thread_local float load_factor = 0.0; - if (((step_counter++) % kernel_select_interval_) == 0) { - load_factor = fast_load_factor(0, stream, false); + if (((step_counter++) % kernel_select_interval_) == 0) { + load_factor = fast_load_factor(0, stream, false); + } + + Selector::execute_kernel(load_factor, options_.block_size, + options_.max_bucket_size, table_->buckets_num, + options_.dim, stream, n, d_table_, + table_->buckets, keys, values, scores, founds); } - Selector::execute_kernel(load_factor, options_.block_size, - options_.max_bucket_size, table_->buckets_num, - options_.dim, stream, n, d_table_, table_->buckets, - keys, values, scores, founds); CudaCheckError(); } @@ -1349,7 +1510,7 @@ } EvictStrategy::set_global_epoch(static_cast<S>(IGNORED_GLOBAL_EPOCH)); - insert_or_assign(count, d_keys, d_values, d_scores, stream, true); + insert_or_assign(count, d_keys, d_values, d_scores, stream, true, true); total_count += count; // Read next batch. diff --git a/review/pr-177/genindex.html b/review/pr-177/genindex.html index 3d07eb285..45df30bc4 100644 --- a/review/pr-177/genindex.html +++ b/review/pr-177/genindex.html @@ -141,9 +141,9 @@

    N

  • nv::merlin::HashTable::export_batch_if (C++ function)
  • -
  • nv::merlin::HashTable::find (C++ function), [1] +
  • nv::merlin::HashTable::find (C++ function), [1]
  • -
  • nv::merlin::HashTable::find_or_insert (C++ function), [1] +
  • nv::merlin::HashTable::find_or_insert (C++ function), [1]
  • nv::merlin::HashTable::HashTable (C++ function)
  • @@ -153,7 +153,7 @@

    N

      -
    • nv::merlin::HashTable::insert_or_assign (C++ function) +
    • nv::merlin::HashTable::insert_or_assign (C++ function)
    • nv::merlin::HashTable::key_type (C++ type)
    • diff --git a/review/pr-177/objects.inv b/review/pr-177/objects.inv index f9c12edc7..82095970b 100644 Binary files a/review/pr-177/objects.inv and b/review/pr-177/objects.inv differ diff --git a/review/pr-177/searchindex.js b/review/pr-177/searchindex.js index 3f70cd9f5..97632c2c6 100644 --- a/review/pr-177/searchindex.js +++ b/review/pr-177/searchindex.js @@ -1 +1 @@ -Search.setIndex({docnames:["CONTRIBUTING","README","api/classnv_1_1merlin_1_1HashTable","api/file_merlin_hashtable.cuh","api/index","api/namespace_nv","api/namespace_nv__merlin","api/program_listing_file_merlin_hashtable.cuh","api/structnv_1_1merlin_1_1EvictStrategy","api/structnv_1_1merlin_1_1HashTableOptions","api/typedef_merlin__hashtable_8cuh_1a5001706db6e977358e7f76ad6773703a","api/unabridged_orphan","api/variable_merlin__hashtable_8cuh_1a359fe56354918308560f46cb3136a3da","index"],envversion:{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":3,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":2,"sphinx.domains.rst":2,"sphinx.domains.std":2,"sphinx.ext.intersphinx":1,"sphinx.ext.viewcode":1,sphinx:56},filenames:["CONTRIBUTING.md","README.md","api/classnv_1_1merlin_1_1HashTable.rst","api/file_merlin_hashtable.cuh.rst","api/index.rst","api/namespace_nv.rst","api/namespace_nv__merlin.rst","api/program_listing_file_merlin_hashtable.cuh.rst","api/structnv_1_1merlin_1_1EvictStrategy.rst","api/structnv_1_1merlin_1_1HashTableOptions.rst","api/typedef_merlin__hashtable_8cuh_1a5001706db6e977358e7f76ad6773703a.rst","api/unabridged_orphan.rst","api/variable_merlin__hashtable_8cuh_1a359fe56354918308560f46cb3136a3da.rst","index.rst"],objects:{"":{"nv::merlin::EraseIfPredict":[10,0,1,"_CPPv4I00EN2nv6merlin14EraseIfPredictE"],"nv::merlin::EraseIfPredict::K":[10,1,1,"_CPPv4I00EN2nv6merlin14EraseIfPredictE"],"nv::merlin::EraseIfPredict::S":[10,1,1,"_CPPv4I00EN2nv6merlin14EraseIfPredictE"],"nv::merlin::EvictStrategy":[8,2,1,"_CPPv4N2nv6merlin13EvictStrategyE"],"nv::merlin::EvictStrategy::EvictStrategyEnum":[8,3,1,"_CPPv4N2nv6merlin13EvictStrategy17EvictStrategyEnumE"],"nv::merlin::EvictStrategy::EvictStrategyEnum::kCustomized":[8,4,1,"_CPPv4N2nv6merlin13EvictStrategy17EvictStrategyEnum11kCustomizedE"],"nv::merlin::EvictStrategy::EvictStrategyEnum::kEpochLfu":[8,4,1,"_CPPv4N2nv6merlin13EvictStrategy17EvictStrategyEnum9kEpochLfuE"],"nv::merlin::EvictStrategy::EvictStrategyEnum::kEpochLru":[8,4,1,"_CPPv4N2nv6merlin13EvictStrategy17EvictStrategyEnum9kEpochLruE"],"nv::merlin::EvictStrategy::EvictStrategyEnum::kLfu":[8,4,1,"_CPPv4N2nv6merlin13EvictStrategy17EvictStrategyEnum4kLfuE"],"nv::merlin::EvictStrategy::EvictStrategyEnum::kLru":[8,4,1,"_CPPv4N2nv6merlin13EvictStrategy17EvictStrategyEnum4kLruE"],"nv::merlin::EvictStrategy::kCustomized":[8,4,1,"_CPPv4N2nv6merlin13EvictStrategy17EvictStrategyEnum11kCustomizedE"],"nv::merlin::EvictStrategy::kEpochLfu":[8,4,1,"_CPPv4N2nv6merlin13EvictStrategy17EvictStrategyEnum9kEpochLfuE"],"nv::merlin::EvictStrategy::kEpochLru":[8,4,1,"_CPPv4N2nv6merlin13EvictStrategy17EvictStrategyEnum9kEpochLruE"],"nv::merlin::EvictStrategy::kLfu":[8,4,1,"_CPPv4N2nv6merlin13EvictStrategy17EvictStrategyEnum4kLfuE"],"nv::merlin::EvictStrategy::kLru":[8,4,1,"_CPPv4N2nv6merlin13EvictStrategy17EvictStrategyEnum4kLruE"],"nv::merlin::EvictStrategy::set_global_epoch":[8,5,1,"_CPPv4N2nv6merlin13EvictStrategy16set_global_epochEK8uint64_t"],"nv::merlin::EvictStrategy::set_global_epoch::epoch":[8,6,1,"_CPPv4N2nv6merlin13EvictStrategy16set_global_epochEK8uint64_t"],"nv::merlin::HashTable":[2,2,1,"_CPPv4I000_i0EN2nv6merlin9HashTableE"],"nv::merlin::HashTable::ArchTag":[2,1,1,"_CPPv4I000_i0EN2nv6merlin9HashTableE"],"nv::merlin::HashTable::HashTable":[2,5,1,"_CPPv4N2nv6merlin9HashTable9HashTableEv"],"nv::merlin::HashTable::K":[2,1,1,"_CPPv4I000_i0EN2nv6merlin9HashTableE"],"nv::merlin::HashTable::Pred":[2,0,1,"_CPPv4N2nv6merlin9HashTable4PredE"],"nv::merlin::HashTable::S":[2,1,1,"_CPPv4I000_i0EN2nv6merlin9HashTableE"],"nv::merlin::HashTable::Strategy":[2,1,1,"_CPPv4I000_i0EN2nv6merlin9HashTableE"],"nv::merlin::HashTable::V":[2,1,1,"_CPPv4I000_i0EN2nv6merlin9HashTableE"],"nv::merlin::HashTable::accum_or_assign":[2,5,1,"_CPPv4N2nv6merlin9HashTable15accum_or_assignEK9size_typePK8key_typePK10value_typePKbPK10score_type12cudaStream_tb"],"nv::merlin::HashTable::accum_or_assign::accum_or_assigns":[2,6,1,"_CPPv4N2nv6merlin9HashTable15accum_or_assignEK9size_typePK8key_typePK10value_typePKbPK10score_type12cudaStream_tb"],"nv::merlin::HashTable::accum_or_assign::ignore_evict_strategy":[2,6,1,"_CPPv4N2nv6merlin9HashTable15accum_or_assignEK9size_typePK8key_typePK10value_typePKbPK10score_type12cudaStream_tb"],"nv::merlin::HashTable::accum_or_assign::keys":[2,6,1,"_CPPv4N2nv6merlin9HashTable15accum_or_assignEK9size_typePK8key_typePK10value_typePKbPK10score_type12cudaStream_tb"],"nv::merlin::HashTable::accum_or_assign::n":[2,6,1,"_CPPv4N2nv6merlin9HashTable15accum_or_assignEK9size_typePK8key_typePK10value_typePKbPK10score_type12cudaStream_tb"],"nv::merlin::HashTable::accum_or_assign::scores":[2,6,1,"_CPPv4N2nv6merlin9HashTable15accum_or_assignEK9size_typePK8key_typePK10value_typePKbPK10score_type12cudaStream_tb"],"nv::merlin::HashTable::accum_or_assign::stream":[2,6,1,"_CPPv4N2nv6merlin9HashTable15accum_or_assignEK9size_typePK8key_typePK10value_typePKbPK10score_type12cudaStream_tb"],"nv::merlin::HashTable::accum_or_assign::value_or_deltas":[2,6,1,"_CPPv4N2nv6merlin9HashTable15accum_or_assignEK9size_typePK8key_typePK10value_typePKbPK10score_type12cudaStream_tb"],"nv::merlin::HashTable::allocator_type":[2,0,1,"_CPPv4N2nv6merlin9HashTable14allocator_typeE"],"nv::merlin::HashTable::assign":[2,5,1,"_CPPv4N2nv6merlin9HashTable6assignEK9size_typePK8key_typePK10value_typePK10score_type12cudaStream_tb"],"nv::merlin::HashTable::assign::keys":[2,6,1,"_CPPv4N2nv6merlin9HashTable6assignEK9size_typePK8key_typePK10value_typePK10score_type12cudaStream_tb"],"nv::merlin::HashTable::assign::n":[2,6,1,"_CPPv4N2nv6merlin9HashTable6assignEK9size_typePK8key_typePK10value_typePK10score_type12cudaStream_tb"],"nv::merlin::HashTable::assign::scores":[2,6,1,"_CPPv4N2nv6merlin9HashTable6assignEK9size_typePK8key_typePK10value_typePK10score_type12cudaStream_tb"],"nv::merlin::HashTable::assign::stream":[2,6,1,"_CPPv4N2nv6merlin9HashTable6assignEK9size_typePK8key_typePK10value_typePK10score_type12cudaStream_tb"],"nv::merlin::HashTable::assign::unique_key":[2,6,1,"_CPPv4N2nv6merlin9HashTable6assignEK9size_typePK8key_typePK10value_typePK10score_type12cudaStream_tb"],"nv::merlin::HashTable::assign::values":[2,6,1,"_CPPv4N2nv6merlin9HashTable6assignEK9size_typePK8key_typePK10value_typePK10score_type12cudaStream_tb"],"nv::merlin::HashTable::bucket_count":[2,5,1,"_CPPv4NK2nv6merlin9HashTable12bucket_countEv"],"nv::merlin::HashTable::capacity":[2,5,1,"_CPPv4NK2nv6merlin9HashTable8capacityEv"],"nv::merlin::HashTable::clear":[2,5,1,"_CPPv4N2nv6merlin9HashTable5clearE12cudaStream_t"],"nv::merlin::HashTable::clear::stream":[2,6,1,"_CPPv4N2nv6merlin9HashTable5clearE12cudaStream_t"],"nv::merlin::HashTable::contains":[2,5,1,"_CPPv4NK2nv6merlin9HashTable8containsEK9size_typePK8key_typePb12cudaStream_t"],"nv::merlin::HashTable::contains::founds":[2,6,1,"_CPPv4NK2nv6merlin9HashTable8containsEK9size_typePK8key_typePb12cudaStream_t"],"nv::merlin::HashTable::contains::keys":[2,6,1,"_CPPv4NK2nv6merlin9HashTable8containsEK9size_typePK8key_typePb12cudaStream_t"],"nv::merlin::HashTable::contains::n":[2,6,1,"_CPPv4NK2nv6merlin9HashTable8containsEK9size_typePK8key_typePb12cudaStream_t"],"nv::merlin::HashTable::contains::stream":[2,6,1,"_CPPv4NK2nv6merlin9HashTable8containsEK9size_typePK8key_typePb12cudaStream_t"],"nv::merlin::HashTable::dim":[2,5,1,"_CPPv4NK2nv6merlin9HashTable3dimEv"],"nv::merlin::HashTable::empty":[2,5,1,"_CPPv4NK2nv6merlin9HashTable5emptyE12cudaStream_t"],"nv::merlin::HashTable::empty::stream":[2,6,1,"_CPPv4NK2nv6merlin9HashTable5emptyE12cudaStream_t"],"nv::merlin::HashTable::erase":[2,5,1,"_CPPv4N2nv6merlin9HashTable5eraseEK9size_typePK8key_type12cudaStream_t"],"nv::merlin::HashTable::erase::keys":[2,6,1,"_CPPv4N2nv6merlin9HashTable5eraseEK9size_typePK8key_type12cudaStream_t"],"nv::merlin::HashTable::erase::n":[2,6,1,"_CPPv4N2nv6merlin9HashTable5eraseEK9size_typePK8key_type12cudaStream_t"],"nv::merlin::HashTable::erase::stream":[2,6,1,"_CPPv4N2nv6merlin9HashTable5eraseEK9size_typePK8key_type12cudaStream_t"],"nv::merlin::HashTable::erase_if":[2,5,1,"_CPPv4II00E0EN2nv6merlin9HashTable8erase_ifE9size_typeRK8key_typeRK10score_type12cudaStream_t"],"nv::merlin::HashTable::erase_if::PredFunctor":[2,1,1,"_CPPv4II00E0EN2nv6merlin9HashTable8erase_ifE9size_typeRK8key_typeRK10score_type12cudaStream_t"],"nv::merlin::HashTable::erase_if::pattern":[2,6,1,"_CPPv4II00E0EN2nv6merlin9HashTable8erase_ifE9size_typeRK8key_typeRK10score_type12cudaStream_t"],"nv::merlin::HashTable::erase_if::stream":[2,6,1,"_CPPv4II00E0EN2nv6merlin9HashTable8erase_ifE9size_typeRK8key_typeRK10score_type12cudaStream_t"],"nv::merlin::HashTable::erase_if::threshold":[2,6,1,"_CPPv4II00E0EN2nv6merlin9HashTable8erase_ifE9size_typeRK8key_typeRK10score_type12cudaStream_t"],"nv::merlin::HashTable::evict_strategy":[2,7,1,"_CPPv4N2nv6merlin9HashTable14evict_strategyE"],"nv::merlin::HashTable::export_batch":[2,5,1,"_CPPv4NK2nv6merlin9HashTable12export_batchEK9size_typeK9size_typeP8key_typeP10value_typeP10score_type12cudaStream_t"],"nv::merlin::HashTable::export_batch::d_counter":[2,6,1,"_CPPv4NK2nv6merlin9HashTable12export_batchE9size_typeK9size_typeP9size_typeP8key_typeP10value_typeP10score_type12cudaStream_t"],"nv::merlin::HashTable::export_batch::keys":[2,6,1,"_CPPv4NK2nv6merlin9HashTable12export_batchEK9size_typeK9size_typeP8key_typeP10value_typeP10score_type12cudaStream_t"],"nv::merlin::HashTable::export_batch::n":[2,6,1,"_CPPv4NK2nv6merlin9HashTable12export_batchEK9size_typeK9size_typeP8key_typeP10value_typeP10score_type12cudaStream_t"],"nv::merlin::HashTable::export_batch::offset":[2,6,1,"_CPPv4NK2nv6merlin9HashTable12export_batchEK9size_typeK9size_typeP8key_typeP10value_typeP10score_type12cudaStream_t"],"nv::merlin::HashTable::export_batch::scores":[2,6,1,"_CPPv4NK2nv6merlin9HashTable12export_batchEK9size_typeK9size_typeP8key_typeP10value_typeP10score_type12cudaStream_t"],"nv::merlin::HashTable::export_batch::stream":[2,6,1,"_CPPv4NK2nv6merlin9HashTable12export_batchEK9size_typeK9size_typeP8key_typeP10value_typeP10score_type12cudaStream_t"],"nv::merlin::HashTable::export_batch::values":[2,6,1,"_CPPv4NK2nv6merlin9HashTable12export_batchEK9size_typeK9size_typeP8key_typeP10value_typeP10score_type12cudaStream_t"],"nv::merlin::HashTable::export_batch_if":[2,5,1,"_CPPv4II00E0ENK2nv6merlin9HashTable15export_batch_ifEvRK8key_typeRK10score_type9size_typeK9size_typeP9size_typeP8key_typeP10value_typeP10score_type12cudaStream_t"],"nv::merlin::HashTable::export_batch_if::PredFunctor":[2,1,1,"_CPPv4II00E0ENK2nv6merlin9HashTable15export_batch_ifEvRK8key_typeRK10score_type9size_typeK9size_typeP9size_typeP8key_typeP10value_typeP10score_type12cudaStream_t"],"nv::merlin::HashTable::export_batch_if::d_counter":[2,6,1,"_CPPv4II00E0ENK2nv6merlin9HashTable15export_batch_ifEvRK8key_typeRK10score_type9size_typeK9size_typeP9size_typeP8key_typeP10value_typeP10score_type12cudaStream_t"],"nv::merlin::HashTable::export_batch_if::keys":[2,6,1,"_CPPv4II00E0ENK2nv6merlin9HashTable15export_batch_ifEvRK8key_typeRK10score_type9size_typeK9size_typeP9size_typeP8key_typeP10value_typeP10score_type12cudaStream_t"],"nv::merlin::HashTable::export_batch_if::n":[2,6,1,"_CPPv4II00E0ENK2nv6merlin9HashTable15export_batch_ifEvRK8key_typeRK10score_type9size_typeK9size_typeP9size_typeP8key_typeP10value_typeP10score_type12cudaStream_t"],"nv::merlin::HashTable::export_batch_if::offset":[2,6,1,"_CPPv4II00E0ENK2nv6merlin9HashTable15export_batch_ifEvRK8key_typeRK10score_type9size_typeK9size_typeP9size_typeP8key_typeP10value_typeP10score_type12cudaStream_t"],"nv::merlin::HashTable::export_batch_if::pattern":[2,6,1,"_CPPv4II00E0ENK2nv6merlin9HashTable15export_batch_ifEvRK8key_typeRK10score_type9size_typeK9size_typeP9size_typeP8key_typeP10value_typeP10score_type12cudaStream_t"],"nv::merlin::HashTable::export_batch_if::scores":[2,6,1,"_CPPv4II00E0ENK2nv6merlin9HashTable15export_batch_ifEvRK8key_typeRK10score_type9size_typeK9size_typeP9size_typeP8key_typeP10value_typeP10score_type12cudaStream_t"],"nv::merlin::HashTable::export_batch_if::stream":[2,6,1,"_CPPv4II00E0ENK2nv6merlin9HashTable15export_batch_ifEvRK8key_typeRK10score_type9size_typeK9size_typeP9size_typeP8key_typeP10value_typeP10score_type12cudaStream_t"],"nv::merlin::HashTable::export_batch_if::threshold":[2,6,1,"_CPPv4II00E0ENK2nv6merlin9HashTable15export_batch_ifEvRK8key_typeRK10score_type9size_typeK9size_typeP9size_typeP8key_typeP10value_typeP10score_type12cudaStream_t"],"nv::merlin::HashTable::export_batch_if::values":[2,6,1,"_CPPv4II00E0ENK2nv6merlin9HashTable15export_batch_ifEvRK8key_typeRK10score_type9size_typeK9size_typeP9size_typeP8key_typeP10value_typeP10score_type12cudaStream_t"],"nv::merlin::HashTable::find":[2,5,1,"_CPPv4NK2nv6merlin9HashTable4findEK9size_typePK8key_typePP10value_typePbP10score_type12cudaStream_t"],"nv::merlin::HashTable::find::founds":[2,6,1,"_CPPv4NK2nv6merlin9HashTable4findEK9size_typePK8key_typePP10value_typePbP10score_type12cudaStream_t"],"nv::merlin::HashTable::find::keys":[2,6,1,"_CPPv4NK2nv6merlin9HashTable4findEK9size_typePK8key_typePP10value_typePbP10score_type12cudaStream_t"],"nv::merlin::HashTable::find::n":[2,6,1,"_CPPv4NK2nv6merlin9HashTable4findEK9size_typePK8key_typePP10value_typePbP10score_type12cudaStream_t"],"nv::merlin::HashTable::find::scores":[2,6,1,"_CPPv4NK2nv6merlin9HashTable4findEK9size_typePK8key_typePP10value_typePbP10score_type12cudaStream_t"],"nv::merlin::HashTable::find::stream":[2,6,1,"_CPPv4NK2nv6merlin9HashTable4findEK9size_typePK8key_typePP10value_typePbP10score_type12cudaStream_t"],"nv::merlin::HashTable::find::values":[2,6,1,"_CPPv4NK2nv6merlin9HashTable4findEK9size_typePK8key_typePP10value_typePbP10score_type12cudaStream_t"],"nv::merlin::HashTable::find_or_insert":[2,5,1,"_CPPv4N2nv6merlin9HashTable14find_or_insertEK9size_typePK8key_typePP10value_typePbP10score_type12cudaStream_tb"],"nv::merlin::HashTable::find_or_insert::founds":[2,6,1,"_CPPv4N2nv6merlin9HashTable14find_or_insertEK9size_typePK8key_typePP10value_typePbP10score_type12cudaStream_tb"],"nv::merlin::HashTable::find_or_insert::ignore_evict_strategy":[2,6,1,"_CPPv4N2nv6merlin9HashTable14find_or_insertEK9size_typePK8key_typePP10value_typePbP10score_type12cudaStream_tb"],"nv::merlin::HashTable::find_or_insert::keys":[2,6,1,"_CPPv4N2nv6merlin9HashTable14find_or_insertEK9size_typePK8key_typePP10value_typePbP10score_type12cudaStream_tb"],"nv::merlin::HashTable::find_or_insert::n":[2,6,1,"_CPPv4N2nv6merlin9HashTable14find_or_insertEK9size_typePK8key_typePP10value_typePbP10score_type12cudaStream_tb"],"nv::merlin::HashTable::find_or_insert::scores":[2,6,1,"_CPPv4N2nv6merlin9HashTable14find_or_insertEK9size_typePK8key_typePP10value_typePbP10score_type12cudaStream_tb"],"nv::merlin::HashTable::find_or_insert::stream":[2,6,1,"_CPPv4N2nv6merlin9HashTable14find_or_insertEK9size_typePK8key_typePP10value_typePbP10score_type12cudaStream_tb"],"nv::merlin::HashTable::find_or_insert::values":[2,6,1,"_CPPv4N2nv6merlin9HashTable14find_or_insertEK9size_typePK8key_typePP10value_typePbP10score_type12cudaStream_tb"],"nv::merlin::HashTable::init":[2,5,1,"_CPPv4N2nv6merlin9HashTable4initERK16HashTableOptionsP14allocator_type"],"nv::merlin::HashTable::init::allocator":[2,6,1,"_CPPv4N2nv6merlin9HashTable4initERK16HashTableOptionsP14allocator_type"],"nv::merlin::HashTable::init::options":[2,6,1,"_CPPv4N2nv6merlin9HashTable4initERK16HashTableOptionsP14allocator_type"],"nv::merlin::HashTable::insert_and_evict":[2,5,1,"_CPPv4N2nv6merlin9HashTable16insert_and_evictEK9size_typePK8key_typePK10value_typePK10score_typeP8key_typeP10value_typeP10score_typeP9size_type12cudaStream_tbb"],"nv::merlin::HashTable::insert_and_evict::d_evicted_counter":[2,6,1,"_CPPv4N2nv6merlin9HashTable16insert_and_evictEK9size_typePK8key_typePK10value_typePK10score_typeP8key_typeP10value_typeP10score_typeP9size_type12cudaStream_tbb"],"nv::merlin::HashTable::insert_and_evict::evicted_keys":[2,6,1,"_CPPv4N2nv6merlin9HashTable16insert_and_evictEK9size_typePK8key_typePK10value_typePK10score_typeP8key_typeP10value_typeP10score_typeP9size_type12cudaStream_tbb"],"nv::merlin::HashTable::insert_and_evict::evicted_scores":[2,6,1,"_CPPv4N2nv6merlin9HashTable16insert_and_evictEK9size_typePK8key_typePK10value_typePK10score_typeP8key_typeP10value_typeP10score_typeP9size_type12cudaStream_tbb"],"nv::merlin::HashTable::insert_and_evict::evicted_values":[2,6,1,"_CPPv4N2nv6merlin9HashTable16insert_and_evictEK9size_typePK8key_typePK10value_typePK10score_typeP8key_typeP10value_typeP10score_typeP9size_type12cudaStream_tbb"],"nv::merlin::HashTable::insert_and_evict::ignore_evict_strategy":[2,6,1,"_CPPv4N2nv6merlin9HashTable16insert_and_evictEK9size_typePK8key_typePK10value_typePK10score_typeP8key_typeP10value_typeP10score_typeP9size_type12cudaStream_tbb"],"nv::merlin::HashTable::insert_and_evict::keys":[2,6,1,"_CPPv4N2nv6merlin9HashTable16insert_and_evictEK9size_typePK8key_typePK10value_typePK10score_typeP8key_typeP10value_typeP10score_typeP9size_type12cudaStream_tbb"],"nv::merlin::HashTable::insert_and_evict::n":[2,6,1,"_CPPv4N2nv6merlin9HashTable16insert_and_evictEK9size_typePK8key_typePK10value_typePK10score_typeP8key_typeP10value_typeP10score_typeP9size_type12cudaStream_tbb"],"nv::merlin::HashTable::insert_and_evict::scores":[2,6,1,"_CPPv4N2nv6merlin9HashTable16insert_and_evictEK9size_typePK8key_typePK10value_typePK10score_typeP8key_typeP10value_typeP10score_typeP9size_type12cudaStream_tbb"],"nv::merlin::HashTable::insert_and_evict::stream":[2,6,1,"_CPPv4N2nv6merlin9HashTable16insert_and_evictEK9size_typePK8key_typePK10value_typePK10score_typeP8key_typeP10value_typeP10score_typeP9size_type12cudaStream_tbb"],"nv::merlin::HashTable::insert_and_evict::unique_key":[2,6,1,"_CPPv4N2nv6merlin9HashTable16insert_and_evictEK9size_typePK8key_typePK10value_typePK10score_typeP8key_typeP10value_typeP10score_typeP9size_type12cudaStream_tbb"],"nv::merlin::HashTable::insert_and_evict::values":[2,6,1,"_CPPv4N2nv6merlin9HashTable16insert_and_evictEK9size_typePK8key_typePK10value_typePK10score_typeP8key_typeP10value_typeP10score_typeP9size_type12cudaStream_tbb"],"nv::merlin::HashTable::insert_or_assign":[2,5,1,"_CPPv4N2nv6merlin9HashTable16insert_or_assignEK9size_typePK8key_typePK10value_typePK10score_type12cudaStream_tb"],"nv::merlin::HashTable::insert_or_assign::ignore_evict_strategy":[2,6,1,"_CPPv4N2nv6merlin9HashTable16insert_or_assignEK9size_typePK8key_typePK10value_typePK10score_type12cudaStream_tb"],"nv::merlin::HashTable::insert_or_assign::keys":[2,6,1,"_CPPv4N2nv6merlin9HashTable16insert_or_assignEK9size_typePK8key_typePK10value_typePK10score_type12cudaStream_tb"],"nv::merlin::HashTable::insert_or_assign::n":[2,6,1,"_CPPv4N2nv6merlin9HashTable16insert_or_assignEK9size_typePK8key_typePK10value_typePK10score_type12cudaStream_tb"],"nv::merlin::HashTable::insert_or_assign::scores":[2,6,1,"_CPPv4N2nv6merlin9HashTable16insert_or_assignEK9size_typePK8key_typePK10value_typePK10score_type12cudaStream_tb"],"nv::merlin::HashTable::insert_or_assign::stream":[2,6,1,"_CPPv4N2nv6merlin9HashTable16insert_or_assignEK9size_typePK8key_typePK10value_typePK10score_type12cudaStream_tb"],"nv::merlin::HashTable::insert_or_assign::values":[2,6,1,"_CPPv4N2nv6merlin9HashTable16insert_or_assignEK9size_typePK8key_typePK10value_typePK10score_type12cudaStream_tb"],"nv::merlin::HashTable::key_type":[2,0,1,"_CPPv4N2nv6merlin9HashTable8key_typeE"],"nv::merlin::HashTable::load":[2,5,1,"_CPPv4N2nv6merlin9HashTable4loadEP10BaseKVFileI1K1V1SEK6size_t12cudaStream_t"],"nv::merlin::HashTable::load::file":[2,6,1,"_CPPv4N2nv6merlin9HashTable4loadEP10BaseKVFileI1K1V1SEK6size_t12cudaStream_t"],"nv::merlin::HashTable::load::max_workspace_size":[2,6,1,"_CPPv4N2nv6merlin9HashTable4loadEP10BaseKVFileI1K1V1SEK6size_t12cudaStream_t"],"nv::merlin::HashTable::load::stream":[2,6,1,"_CPPv4N2nv6merlin9HashTable4loadEP10BaseKVFileI1K1V1SEK6size_t12cudaStream_t"],"nv::merlin::HashTable::load_factor":[2,5,1,"_CPPv4NK2nv6merlin9HashTable11load_factorE12cudaStream_t"],"nv::merlin::HashTable::load_factor::stream":[2,6,1,"_CPPv4NK2nv6merlin9HashTable11load_factorE12cudaStream_t"],"nv::merlin::HashTable::max_bucket_size":[2,5,1,"_CPPv4NK2nv6merlin9HashTable15max_bucket_sizeEv"],"nv::merlin::HashTable::reserve":[2,5,1,"_CPPv4N2nv6merlin9HashTable7reserveEK9size_type12cudaStream_t"],"nv::merlin::HashTable::reserve::new_capacity":[2,6,1,"_CPPv4N2nv6merlin9HashTable7reserveEK9size_type12cudaStream_t"],"nv::merlin::HashTable::reserve::stream":[2,6,1,"_CPPv4N2nv6merlin9HashTable7reserveEK9size_type12cudaStream_t"],"nv::merlin::HashTable::save":[2,5,1,"_CPPv4NK2nv6merlin9HashTable4saveEP10BaseKVFileI1K1V1SEK6size_t12cudaStream_t"],"nv::merlin::HashTable::save::file":[2,6,1,"_CPPv4NK2nv6merlin9HashTable4saveEP10BaseKVFileI1K1V1SEK6size_t12cudaStream_t"],"nv::merlin::HashTable::save::max_workspace_size":[2,6,1,"_CPPv4NK2nv6merlin9HashTable4saveEP10BaseKVFileI1K1V1SEK6size_t12cudaStream_t"],"nv::merlin::HashTable::save::stream":[2,6,1,"_CPPv4NK2nv6merlin9HashTable4saveEP10BaseKVFileI1K1V1SEK6size_t12cudaStream_t"],"nv::merlin::HashTable::score_type":[2,0,1,"_CPPv4N2nv6merlin9HashTable10score_typeE"],"nv::merlin::HashTable::set_max_capacity":[2,5,1,"_CPPv4N2nv6merlin9HashTable16set_max_capacityE9size_type"],"nv::merlin::HashTable::set_max_capacity::new_max_capacity":[2,6,1,"_CPPv4N2nv6merlin9HashTable16set_max_capacityE9size_type"],"nv::merlin::HashTable::size":[2,5,1,"_CPPv4NK2nv6merlin9HashTable4sizeE12cudaStream_t"],"nv::merlin::HashTable::size::stream":[2,6,1,"_CPPv4NK2nv6merlin9HashTable4sizeE12cudaStream_t"],"nv::merlin::HashTable::size_type":[2,0,1,"_CPPv4N2nv6merlin9HashTable9size_typeE"],"nv::merlin::HashTable::value_type":[2,0,1,"_CPPv4N2nv6merlin9HashTable10value_typeE"],"nv::merlin::HashTable::~HashTable":[2,5,1,"_CPPv4N2nv6merlin9HashTableD0Ev"],"nv::merlin::HashTableOptions":[9,2,1,"_CPPv4N2nv6merlin16HashTableOptionsE"],"nv::merlin::HashTableOptions::block_size":[9,7,1,"_CPPv4N2nv6merlin16HashTableOptions10block_sizeE"],"nv::merlin::HashTableOptions::device_id":[9,7,1,"_CPPv4N2nv6merlin16HashTableOptions9device_idE"],"nv::merlin::HashTableOptions::device_memory_pool":[9,7,1,"_CPPv4N2nv6merlin16HashTableOptions18device_memory_poolE"],"nv::merlin::HashTableOptions::dim":[9,7,1,"_CPPv4N2nv6merlin16HashTableOptions3dimE"],"nv::merlin::HashTableOptions::host_memory_pool":[9,7,1,"_CPPv4N2nv6merlin16HashTableOptions16host_memory_poolE"],"nv::merlin::HashTableOptions::init_capacity":[9,7,1,"_CPPv4N2nv6merlin16HashTableOptions13init_capacityE"],"nv::merlin::HashTableOptions::io_block_size":[9,7,1,"_CPPv4N2nv6merlin16HashTableOptions13io_block_sizeE"],"nv::merlin::HashTableOptions::io_by_cpu":[9,7,1,"_CPPv4N2nv6merlin16HashTableOptions9io_by_cpuE"],"nv::merlin::HashTableOptions::max_bucket_size":[9,7,1,"_CPPv4N2nv6merlin16HashTableOptions15max_bucket_sizeE"],"nv::merlin::HashTableOptions::max_capacity":[9,7,1,"_CPPv4N2nv6merlin16HashTableOptions12max_capacityE"],"nv::merlin::HashTableOptions::max_hbm_for_vectors":[9,7,1,"_CPPv4N2nv6merlin16HashTableOptions19max_hbm_for_vectorsE"],"nv::merlin::HashTableOptions::max_load_factor":[9,7,1,"_CPPv4N2nv6merlin16HashTableOptions15max_load_factorE"],"nv::merlin::HashTableOptions::use_constant_memory":[9,7,1,"_CPPv4N2nv6merlin16HashTableOptions19use_constant_memoryE"],"nv::merlin::thrust_par":[12,7,1,"_CPPv4N2nv6merlin10thrust_parE"]}},objnames:{"0":["cpp","type","C++ type"],"1":["cpp","templateParam","C++ template parameter"],"2":["cpp","class","C++ class"],"3":["cpp","enum","C++ enum"],"4":["cpp","enumerator","C++ enumerator"],"5":["cpp","function","C++ function"],"6":["cpp","functionParam","C++ function parameter"],"7":["cpp","member","C++ member"]},objtypes:{"0":"cpp:type","1":"cpp:templateParam","2":"cpp:class","3":"cpp:enum","4":"cpp:enumerator","5":"cpp:function","6":"cpp:functionParam","7":"cpp:member"},terms:{"006":1,"010":1,"013":1,"015":1,"020":1,"022":1,"041":1,"044":1,"046":1,"048":1,"049":1,"050":1,"052":1,"062":1,"068":1,"069":1,"070":1,"072":1,"073":1,"081":1,"082":1,"083":1,"085":1,"096":1,"0x1":2,"0xffff000000000000":10,"0xffffffff":[1,8],"0xfffffffffffffffc":1,"0xfffffffffffffffd":1,"0xfffffffffffffffe":1,"0xffffffffffffffff":1,"101":1,"101600":7,"1024":[1,2,7,9],"1024ul":7,"1048576":1,"106":1,"110":1,"111":1,"113":1,"115":1,"123":1,"124":1,"128":[1,7,9],"129":1,"131":1,"136":1,"148":1,"153":1,"179":1,"180":1,"185":1,"2022":7,"2023":1,"261":1,"269":1,"277":1,"293":1,"298":1,"299":1,"304":1,"305":1,"309":1,"321":1,"322":1,"325":1,"32bit":[1,8],"334":1,"335":1,"346":1,"349":1,"360":1,"366":1,"371":1,"373":1,"393":1,"394":1,"396":1,"399":1,"470":1,"473":1,"512":1,"517":1,"530":1,"541":1,"543":1,"561":1,"567":1,"578":1,"579":1,"580":1,"587":1,"590":1,"651":1,"662":1,"683":1,"701":1,"706":1,"720":1,"730":1,"754":1,"759":1,"766":1,"771":1,"777":1,"782":1,"791":1,"798":1,"801":1,"804":1,"807":1,"80gb":1,"813":1,"825":1,"840":1,"849":1,"854":1,"862":1,"863":1,"864":1,"873":1,"874":1,"894":1,"898":1,"908":1,"911":1,"926":1,"929":1,"930":1,"932":1,"934":1,"939":1,"972":1,"981":1,"983":1,"988":1,"994":1,"\u03bb":1,"boolean":2,"byte":[1,9],"char":[1,7],"class":[1,7,10,11],"const":[2,7,8,10],"default":[1,2,9],"enum":[7,8],"export":[1,2,10],"float":[1,2,7,9],"function":[1,2,8,10],"import":[1,2,8],"int":[1,2,7,9],"new":[1,2,7],"public":[0,1,2,7,8,9],"return":[1,2,7,10],"static":[2,7,8,12],"super":1,"throw":[2,7],"true":[2,7,10],"void":[2,7,8],"while":[2,7,8],For:[1,8,13],The:[0,1,2,7,8,9,10],Use:1,Used:8,__device__:[2,10],__forceinline__:[2,10],a100:1,about:13,abov:0,access:[2,7],accommod:2,accum:2,accum_or_assign:[1,2,7],accum_or_assign_kernel:7,accumul:2,actual:2,add:1,added:2,address:[1,2],adher:0,advanc:1,advertis:1,after:[1,8],agre:7,algorithm:2,all:[0,1,2,10],allevi:1,alloc:[2,3,7],allocator_:7,allocator_typ:[2,7],allow:7,alreadi:2,also:[0,1,2],alwai:7,amount:[2,7],ani:[0,1,2,7,8],anoth:2,answer:1,anymor:7,anywai:2,apach:[0,1,7],api:[0,2,8,10],appli:2,applic:7,appropri:2,archtag:[2,7],argc:1,argument:2,argv:1,array_kernel:[3,7],assign:[1,2,7,8],assum:2,atom:[3,7],atomic_bool:7,attempt:7,attribut:2,auto:[7,12],automat:[2,8],avail:13,averag:2,avoid:7,bandwidth:[0,1,13],base:[1,7],basealloc:[2,7],basekvfil:[2,7],bash:1,basi:7,basic:[1,2],batch:[2,7],bazel_build:1,bazelrc:1,becaus:2,been:[1,7],befor:[0,1,2,9],below:1,benchmark_util:1,better:1,billion:1,binari:1,bit:8,block:[1,9],block_siz:[1,7,9],blockquot:8,bool:[1,2,7,9,10],bring:13,bucket:[1,2,7,8,9],bucket_count:[2,7],bucket_max_s:7,bucket_s:7,buckets_num:7,buckets_s:7,bug:[0,1],bypass:1,cach:7,call:[2,7],callabl:7,caller:[1,2,8],can:[0,1,2],capabl:[0,1],capac:[1,2,7,9],certain:[1,2],challeng:1,chang:[0,2,7],check:2,check_evict_strategi:7,chunk:2,clear:[2,7],clear_kernel:7,clock:[1,8],clone:1,close:[1,8],cluster:1,code:1,com:[0,1,13],command:1,comment:1,common:1,commun:1,compact:2,compil:1,complianc:7,comput:1,concept:8,concurr:2,condit:[1,2,7],conduct:[0,2],configur:[2,9],conform:2,confrom:2,consid:2,consider:2,consist:2,constant:[1,8],constexpr:[2,7,12],construct:7,constructor:2,consumpt:1,contain:[1,2,7],containskernelparam:7,containsparam:7,content:7,contribut:1,control:1,copi:7,copyright:7,core_kernel:[3,7],corpor:7,correspond:2,could:[1,8],count:7,counter:7,cpu:[1,9],creat:[1,7],create_t:7,criteria:0,cstdint:[3,7],cuda:[1,2,7,9,12],cuda_check:7,cudacheckerror:7,cudadeviceprop:7,cudadevicesynchron:7,cudaexcept:2,cudagetdevic:7,cudagetdeviceproperti:7,cudamemcpi:7,cudamemcpyasync:7,cudamemcpydefault:7,cudamemcpydevicetohost:7,cudamemcpyhosttodevic:7,cudamemsetasync:7,cudasetdevic:7,cudastream_t:[2,7],cudastreamsynchron:7,cuh:[1,2,4,8,9,10,11,12],current:[2,7],custom:[1,2,7,8],customiz:[1,10],d_count:7,d_counter:[2,7],d_dst:7,d_dst_ptr:7,d_evicted_count:[2,7],d_kei:7,d_mask:7,d_offset:7,d_score:7,d_src_offset:7,d_src_offset_ptr:7,d_table_:7,d_table_value_addr:7,d_valu:7,data:[2,7],dcmake_build_typ:1,default_allocator_:7,defaultalloc:7,defer_lock:7,defin:[1,2,8,9,10,12],definit:[1,11],delet:7,delta:[1,2,7],denot:2,dep:7,descript:1,design:[0,1],destroi:2,destroy_t:7,detail:1,dev_mem_pool_:7,dev_w:7,dev_ws_siz:7,develop:13,devic:[1,2,7,8,9],device_clock:[1,8],device_id:[1,7,9],device_memory_pool:[7,9],device_ptr:7,device_vector:[3,7],devicealloc:7,devicememorypool:7,deviceprop:7,differ:[1,8],difficult:1,dim:[1,2,7,9],dimens:[1,2,9],directli:[1,2],discuss:0,distribut:7,divid:2,dlrm:1,doc:1,docker:1,document:[0,7,13],doe:2,doesn:2,don:1,doubl:2,double_capac:7,dsm:1,dst:7,dst_offset:7,dst_offset_ptr:7,dst_ptr:7,dump:[2,7],dump_kernel:7,dump_kernel_shared_memory_s:7,each:[0,1,2,8,9],effici:1,either:7,element:2,els:7,embed:[0,1],empti:[2,7],empty_kei:7,encount:1,end:[0,1],end_i:7,endif:7,engin:1,enough:7,enumer:8,environ:1,epoch:[1,7,8],epochlfu:1,epochlru:1,equal:[1,2,7,8],equival:2,eras:[2,7,10],erase_if:[2,7,10],eraseifpredfunctor:[2,10],eraseifpredict:[2,3,4,6,7],error:2,evalu:1,evcted_scor:2,even:2,evict:[2,7,8],evict_strategi:[2,7],evicted_kei:[2,7],evicted_scor:[2,7],evicted_valu:[2,7],evictstrategi:[1,2,3,4,6,7],evictstrategyenum:[7,8],evictstrategyparam:7,exampl:[2,10],exceed:2,except:[2,7],exclus:7,execut:2,execute_kernel:7,execution_polici:[3,7],exist:2,expect:2,export_batch:[1,2,7],export_batch_if:[1,2,7,10],exportifpredfunctor:[2,10],express:7,extern:1,face:1,factor:[1,2,9],fals:[1,2,7,9],fast_load_factor:7,featur:[0,1],file:[1,2,8,9,10,12],filesystem:2,find:[1,2,7],find_or_insert:[1,2,7],find_or_insert_kernel:7,first:[2,7,8],firstli:1,fix:[0,1],flag:[1,9],flexible_buff:[3,7],float32:1,follow:[0,1,2],format:2,found:[1,2,7],fourth:2,free:[2,7],frequenc:[1,2,8],from:[1,2,8,10],full:[1,2,8],fulli:[1,8],functor:2,futur:2,gcc:1,gener:[0,1],get:[1,7],get_workspac:7,git:1,github:[0,1,13],global:[1,8],global_epoch:[1,7,8],govern:7,gpu:[0,1,2,7,13],gpu_boolean_mask:7,grab:7,granular:[1,8],grate:[0,1],greater:2,grid_siz:7,group_lock:[3,7],group_shared_mutex:7,growth:1,guarante:2,guid:0,h_dst:7,h_evicted_count:7,h_found:7,h_kei:7,h_param_key_index:7,h_param_valu:7,h_score:7,h_size:7,h_src_offset:7,h_table_value_addr:7,h_valu:7,handl:[1,9],happen:1,has:[1,2],hash:[1,2,9,10],hashtabl:[1,3,4,6,7],hashtableopt:[1,2,3,4,6,7],have:[1,7],hbm:[0,2,7,9,13],header:1,help:1,hierarch:[0,1,2,13],hierarchicalkv:[2,7,9,13],high:[0,1,2,8,13],higher:2,highli:1,hkvtabl:1,hold:7,host:[0,1,2,7,8,9,13],host_mem_pool_:7,host_memory_pool:[7,9],host_w:7,host_ws_siz:7,hostalloc:7,hostmemorypool:7,hot:1,how:0,http:[0,1,7,13],ignor:2,ignore_evict_strategi:[2,7],ignored_global_epoch:7,imag:1,implement:1,impli:7,improv:1,includ:[0,1,7,11],increas:8,increment:[1,8],indic:[1,2,9,10],industri:1,inform:13,init:[1,2,7],init_capac:[1,7,9],initi:[1,2,9],initialized_:7,inlin:[2,7,8],input:[1,8],insert:[1,2,8],insert_and_evict:[1,2,7],insert_or_assign:[1,2,7],insert_unique_lock:7,int64_t:[1,7],integr:0,interest:0,intern:[1,2,8],introduc:[1,2,8],introduct:13,invalid:7,invalid_argu:7,io_block_s:[1,7,9],io_by_cpu:[1,7,9],is_fast_mod:7,is_pow:7,is_pure_hbm:7,is_sam:7,ispow2:7,issu:[0,1],item:[2,10],its:2,june:1,just:2,kcustom:[7,8],keep:[1,2,8],kei:[0,2,7,8,10],kepochlfu:[7,8],kepochlru:[7,8],kernel:[1,7,9],kernel_select_interval_:7,kernelparam:7,kernelselector_upd:7,kernelselector_updatescor:7,kernelselector_upsertandevict:7,key_typ:[1,2,7],keys_not_empti:7,kind:7,klfu:[7,8],klru:[1,2,7,8],kvm_size:7,languag:7,larg:[1,2,7],larger:[1,2,7,8],later:1,law:7,lead:2,learn:1,least:[1,2,8],length:[1,2,9],less:[1,2,7,8],lfu:[1,7,8],librari:[0,1,13],licens:[0,7],lifan:1,like:[1,2,8],limit:[1,3,7],line:7,list:[3,11],load:[1,2,7,9],load_factor:[2,7],local:1,lock:7,lookup:7,lookup_kernel:7,lookupkernelparam:7,lookupparam:7,low:[1,8],lru:[1,2,7,8],machin:1,mai:[0,7],main:1,maintain:1,maintian:[0,1],make:[1,7],make_uniqu:[1,7],manag:1,match:[1,2],max:[1,7,8,9],max_bucket_s:[1,2,7,9],max_capac:[1,2,7,9],max_hbm_for_vector:[1,7,9],max_load_factor:[1,7,9],max_value_s:7,max_workspace_s:[2,7],maximum:[1,2,9],mean:1,meet:[0,1],member:9,memori:[0,1,2,3,7,9,13],memory_pool:[3,7],memorypool:7,memorypoolopt:[7,9],memorytyp:7,merlin:[0,1,2,3,4,5,7,8,9],merlin_check:7,merlin_hasht:[1,2,4,8,9,10,11,12],merlin_hashtable_benchmark:1,merlin_hashtable_test:1,method:2,might:2,million:1,min:7,minimum:[1,2,8],miss:[1,2],mkdir:1,mode:[7,8],model:1,modifi:[1,2],more:[1,2,8,13],most:1,move:1,must:[1,2,7],mutabl:7,mutex:[3,7],mutex_:7,mutipl:7,n_offset:7,name:1,namespac:[7,11],nanosecond:[1,8],necessari:[0,1],need:[0,1,2,7],need_lock:7,new_capac:[2,7],new_max_capac:[2,7],next:7,noexcept:[2,7],none:7,note:1,noth:2,notic:2,now:2,nullptr:[2,7,8],number:[1,2],numeric_limit:7,nvcr:1,nvidia:[0,7,13],object:2,obtain:7,occur:[2,8],occurr:2,offset:[2,7],old:2,onc:7,one:[1,7],onli:[1,2,7],open:[0,1,13],oper:[1,2,7,10],option:[2,7,9],options_:[2,7],order:1,org:7,origin:1,other:2,otherwis:2,our:13,out:0,output:2,overarch:13,overwrit:1,overwritten:2,pair:2,par:[7,12],par_nosync:7,param:[2,7],param_key_index:7,param_key_index_ptr:7,paramet:[1,2,8],part:[0,1],pattern:[2,7,10],pcie:1,pend:7,per:[1,2],perform:2,permiss:7,pipelin:7,pipeline_max_s:7,pleas:[1,7],plu:7,pointer:1,polici:8,pool:[7,9],posit:2,possibl:7,pow:7,power:[2,7],pragma:7,pred:[2,7],predfunctor:[2,7],predic:2,privat:7,process:[2,7],product:[0,1],program:[3,11],project:[0,13],propos:0,provid:[0,1,8,13],pull:[0,1],pure:7,put:2,py3:1,question:1,rais:2,ram:13,reach:[1,8],reach_max_capacity_:7,read:[1,7],read_kernel:7,read_or_write_by_cpu:7,read_or_write_kernel:7,read_shared_lock:7,real:1,recent:8,recommend:[0,1,2],recsi:[0,1],recurs:1,reduc:[1,2,7],refer:[0,1],regular:1,rehash:[1,2,9],rehash_kernel_for_fast_mod:7,reinterpret_cast:7,releas:[1,2],remov:[2,10],remove_kernel:7,replac:2,repositori:13,request:[0,2,7],requir:[1,2,7],reserv:[1,2,7,9],reset:7,resolut:1,resourc:[1,2],respons:2,restraint:1,result:2,review:[0,1],run:[1,7],runtime_error:7,safe:2,safe_get_block_s:7,safe_get_grid_s:7,same:[1,2],satisfi:2,save:[2,7],scale:1,scenario:1,score:[1,2,7,8,10],score_s:7,score_typ:[1,2,7],scoredata:7,search:[1,2],second:1,see:[7,13],select_kernel:7,selectaccumorassignkernelwithio:7,selectcontainskernel:7,selectfindorinsertkernelwithio:7,selectfindorinsertptrkernel:7,selectlookupkernelwithio:7,selectlookupptrkernel:7,selector:7,selectpipelinecontainskernel:7,selectpipelinelookupkernelwithio:7,selectupdatekernelwithio:7,selectupdatescorekernel:7,selectupsertandevictkernelwithio:7,selectupsertkernelwithio:7,send:0,separ:1,seper:2,serv:1,set:[1,2,7,8],set_global_epoch:[7,8],set_max_capac:[2,7],sever:1,shape:2,share:2,shared_mem_size_:7,shared_mutex:[3,7],shared_s:7,sharedmemperblock:7,should:[1,2,7,8,10],signatur:2,signific:0,simplifi:1,singl:[1,7],situat:2,size:[1,2,7,9],size_ptr:7,size_t:[1,2,7,9],size_typ:[2,7],sizeof:7,slightli:[1,8],slot:2,sm80:[2,7],small:1,smaller:7,softwar:7,some:0,someth:1,sophist:1,sort:[3,7],sort_by_kei:7,sourc:[1,13],special:2,specif:[1,7],specifi:[1,2,7],src:7,src_offset:7,src_offset_ptr:7,src_ptr:7,ssd:2,standard:1,start_i:7,static_assert:7,static_cast:7,statu:[1,2],std:[1,7],step:7,step_count:7,storag:[0,1,2,7],store:[0,1],strategi:[2,7,8],stream:[2,7],struct:[1,2,3,6,7,10],submiss:0,submit:1,successfulli:2,suitabl:1,support:[2,7],sure:7,sync_table_configur:7,system:1,tabl:[1,2,7,9,10],table_:7,table_value_ptr:7,tablecor:7,tableopt:1,target:2,team:[0,1],templat:[3,4,6,7,10],temporari:[2,7],tensorflow:1,terabyt:1,test:[0,1],tf2:1,than:[2,7],thei:[1,2,8],them:1,thi:[0,2,7,10],third:2,thread:[1,2],thread_loc:7,threshold:[2,7,10],through:7,throughput:1,thrust:[3,7,12],thrust_par:[3,4,6,7],thrust_vers:7,tie:7,tile:7,tile_s:7,time:1,timestamp:[2,8],todo:7,togeth:13,too:[1,2,7],total:2,total_count:7,total_s:7,train:1,travers:10,treat:2,tupl:[1,2,7],tuple_s:7,type:[1,2,3,7,8],type_trait:[3,7],typedef:11,typenam:[2,7],uint32_t:7,uint64_t:[1,2,7,8],uintptr_t:7,under:7,uniqu:2,unique_kei:[2,7],unique_ptr:[1,7],unit:[0,1],unless:[2,7],unsaf:[1,2],unsign:7,updat:[1,2],update_kernel:7,update_read_lock:7,update_shared_lock:7,upsert_kernel:7,use:[0,2,7],use_constant_memori:[7,9],used:2,user:[0,1,2],using:[1,2,7,10,13],util:[1,3,7],valid:1,valu:[0,1,2,7,8],value_or_delta:[2,7],value_s:7,value_typ:[2,7],variabl:[1,11],vector:[1,2,9],vectors_or_delta:2,veri:1,verifi:1,version:[1,7],via:[0,1,8],warmup:1,warranti:7,websit:13,welcom:0,well:1,when:[0,1,2,7,8],whether:2,which:[1,2,8,10],within:2,without:[2,7],won:[2,7],work:1,workload:1,workspac:7,world:1,would:8,write:7,write_by_cpu:7,write_kernel:7,write_with_accum_kernel:7,ws_size:7,www:7,you:[0,1,2,7],your:[0,1],zero:7,zhangyafei:1},titles:["Contributing","NVIDIA HierarchicalKV(Beta)","Template Class HashTable","File merlin_hashtable.cuh","HierarchicalKV C++ API Documentation","Namespace nv","Namespace nv::merlin","Program Listing for File merlin_hashtable.cuh","Struct EvictStrategy","Struct HashTableOptions","Typedef nv::merlin::EraseIfPredict","Complete HierarchicalKV API","Variable nv::merlin::thrust_par","Merlin Key-Value Storage"],titleterms:{"class":[2,3,4,6],about:[0,1],acknowledg:1,addit:0,api:[1,4,11],bazel:1,benchmark:1,benefit:1,beta:1,build:1,cmake:1,code:0,commun:0,complet:[4,11],configur:1,content:[3,5,6],contribut:0,contributor:1,cuh:[3,7],definit:3,document:[1,2,4,8,9,10,12],eraseifpredict:10,evict:1,evictstrategi:8,feedback:1,file:[3,4,7,11],hashtabl:2,hashtableopt:9,hbm:1,hierarchi:4,hierarchicalkv:[0,1,4,11],hmem:1,how:1,hybrid:1,idea:1,includ:3,kei:[1,13],licenc:0,licens:1,list:7,maintainership:0,matrix:1,matur:1,merlin:[6,10,12,13],merlin_hasht:[3,7],mode:1,namespac:[3,4,5,6],nvidia:1,option:1,perform:1,program:7,pure:1,relat:13,requir:0,resourc:13,restrict:1,storag:13,strategi:1,struct:[4,8,9],style:0,support:1,templat:2,thrust_par:12,typedef:[3,4,6,10],usag:1,use:1,valu:13,variabl:[3,4,6,12]}}) \ No newline at end of file +Search.setIndex({docnames:["CONTRIBUTING","README","api/classnv_1_1merlin_1_1HashTable","api/file_merlin_hashtable.cuh","api/index","api/namespace_nv","api/namespace_nv__merlin","api/program_listing_file_merlin_hashtable.cuh","api/structnv_1_1merlin_1_1EvictStrategy","api/structnv_1_1merlin_1_1HashTableOptions","api/typedef_merlin__hashtable_8cuh_1a5001706db6e977358e7f76ad6773703a","api/unabridged_orphan","api/variable_merlin__hashtable_8cuh_1a359fe56354918308560f46cb3136a3da","index"],envversion:{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":3,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":2,"sphinx.domains.rst":2,"sphinx.domains.std":2,"sphinx.ext.intersphinx":1,"sphinx.ext.viewcode":1,sphinx:56},filenames:["CONTRIBUTING.md","README.md","api/classnv_1_1merlin_1_1HashTable.rst","api/file_merlin_hashtable.cuh.rst","api/index.rst","api/namespace_nv.rst","api/namespace_nv__merlin.rst","api/program_listing_file_merlin_hashtable.cuh.rst","api/structnv_1_1merlin_1_1EvictStrategy.rst","api/structnv_1_1merlin_1_1HashTableOptions.rst","api/typedef_merlin__hashtable_8cuh_1a5001706db6e977358e7f76ad6773703a.rst","api/unabridged_orphan.rst","api/variable_merlin__hashtable_8cuh_1a359fe56354918308560f46cb3136a3da.rst","index.rst"],objects:{"":{"nv::merlin::EraseIfPredict":[10,0,1,"_CPPv4I00EN2nv6merlin14EraseIfPredictE"],"nv::merlin::EraseIfPredict::K":[10,1,1,"_CPPv4I00EN2nv6merlin14EraseIfPredictE"],"nv::merlin::EraseIfPredict::S":[10,1,1,"_CPPv4I00EN2nv6merlin14EraseIfPredictE"],"nv::merlin::EvictStrategy":[8,2,1,"_CPPv4N2nv6merlin13EvictStrategyE"],"nv::merlin::EvictStrategy::EvictStrategyEnum":[8,3,1,"_CPPv4N2nv6merlin13EvictStrategy17EvictStrategyEnumE"],"nv::merlin::EvictStrategy::EvictStrategyEnum::kCustomized":[8,4,1,"_CPPv4N2nv6merlin13EvictStrategy17EvictStrategyEnum11kCustomizedE"],"nv::merlin::EvictStrategy::EvictStrategyEnum::kEpochLfu":[8,4,1,"_CPPv4N2nv6merlin13EvictStrategy17EvictStrategyEnum9kEpochLfuE"],"nv::merlin::EvictStrategy::EvictStrategyEnum::kEpochLru":[8,4,1,"_CPPv4N2nv6merlin13EvictStrategy17EvictStrategyEnum9kEpochLruE"],"nv::merlin::EvictStrategy::EvictStrategyEnum::kLfu":[8,4,1,"_CPPv4N2nv6merlin13EvictStrategy17EvictStrategyEnum4kLfuE"],"nv::merlin::EvictStrategy::EvictStrategyEnum::kLru":[8,4,1,"_CPPv4N2nv6merlin13EvictStrategy17EvictStrategyEnum4kLruE"],"nv::merlin::EvictStrategy::kCustomized":[8,4,1,"_CPPv4N2nv6merlin13EvictStrategy17EvictStrategyEnum11kCustomizedE"],"nv::merlin::EvictStrategy::kEpochLfu":[8,4,1,"_CPPv4N2nv6merlin13EvictStrategy17EvictStrategyEnum9kEpochLfuE"],"nv::merlin::EvictStrategy::kEpochLru":[8,4,1,"_CPPv4N2nv6merlin13EvictStrategy17EvictStrategyEnum9kEpochLruE"],"nv::merlin::EvictStrategy::kLfu":[8,4,1,"_CPPv4N2nv6merlin13EvictStrategy17EvictStrategyEnum4kLfuE"],"nv::merlin::EvictStrategy::kLru":[8,4,1,"_CPPv4N2nv6merlin13EvictStrategy17EvictStrategyEnum4kLruE"],"nv::merlin::EvictStrategy::set_global_epoch":[8,5,1,"_CPPv4N2nv6merlin13EvictStrategy16set_global_epochEK8uint64_t"],"nv::merlin::EvictStrategy::set_global_epoch::epoch":[8,6,1,"_CPPv4N2nv6merlin13EvictStrategy16set_global_epochEK8uint64_t"],"nv::merlin::HashTable":[2,2,1,"_CPPv4I000_i0EN2nv6merlin9HashTableE"],"nv::merlin::HashTable::ArchTag":[2,1,1,"_CPPv4I000_i0EN2nv6merlin9HashTableE"],"nv::merlin::HashTable::HashTable":[2,5,1,"_CPPv4N2nv6merlin9HashTable9HashTableEv"],"nv::merlin::HashTable::K":[2,1,1,"_CPPv4I000_i0EN2nv6merlin9HashTableE"],"nv::merlin::HashTable::Pred":[2,0,1,"_CPPv4N2nv6merlin9HashTable4PredE"],"nv::merlin::HashTable::S":[2,1,1,"_CPPv4I000_i0EN2nv6merlin9HashTableE"],"nv::merlin::HashTable::Strategy":[2,1,1,"_CPPv4I000_i0EN2nv6merlin9HashTableE"],"nv::merlin::HashTable::V":[2,1,1,"_CPPv4I000_i0EN2nv6merlin9HashTableE"],"nv::merlin::HashTable::accum_or_assign":[2,5,1,"_CPPv4N2nv6merlin9HashTable15accum_or_assignEK9size_typePK8key_typePK10value_typePKbPK10score_type12cudaStream_tb"],"nv::merlin::HashTable::accum_or_assign::accum_or_assigns":[2,6,1,"_CPPv4N2nv6merlin9HashTable15accum_or_assignEK9size_typePK8key_typePK10value_typePKbPK10score_type12cudaStream_tb"],"nv::merlin::HashTable::accum_or_assign::ignore_evict_strategy":[2,6,1,"_CPPv4N2nv6merlin9HashTable15accum_or_assignEK9size_typePK8key_typePK10value_typePKbPK10score_type12cudaStream_tb"],"nv::merlin::HashTable::accum_or_assign::keys":[2,6,1,"_CPPv4N2nv6merlin9HashTable15accum_or_assignEK9size_typePK8key_typePK10value_typePKbPK10score_type12cudaStream_tb"],"nv::merlin::HashTable::accum_or_assign::n":[2,6,1,"_CPPv4N2nv6merlin9HashTable15accum_or_assignEK9size_typePK8key_typePK10value_typePKbPK10score_type12cudaStream_tb"],"nv::merlin::HashTable::accum_or_assign::scores":[2,6,1,"_CPPv4N2nv6merlin9HashTable15accum_or_assignEK9size_typePK8key_typePK10value_typePKbPK10score_type12cudaStream_tb"],"nv::merlin::HashTable::accum_or_assign::stream":[2,6,1,"_CPPv4N2nv6merlin9HashTable15accum_or_assignEK9size_typePK8key_typePK10value_typePKbPK10score_type12cudaStream_tb"],"nv::merlin::HashTable::accum_or_assign::value_or_deltas":[2,6,1,"_CPPv4N2nv6merlin9HashTable15accum_or_assignEK9size_typePK8key_typePK10value_typePKbPK10score_type12cudaStream_tb"],"nv::merlin::HashTable::allocator_type":[2,0,1,"_CPPv4N2nv6merlin9HashTable14allocator_typeE"],"nv::merlin::HashTable::assign":[2,5,1,"_CPPv4N2nv6merlin9HashTable6assignEK9size_typePK8key_typePK10value_typePK10score_type12cudaStream_tb"],"nv::merlin::HashTable::assign::keys":[2,6,1,"_CPPv4N2nv6merlin9HashTable6assignEK9size_typePK8key_typePK10value_typePK10score_type12cudaStream_tb"],"nv::merlin::HashTable::assign::n":[2,6,1,"_CPPv4N2nv6merlin9HashTable6assignEK9size_typePK8key_typePK10value_typePK10score_type12cudaStream_tb"],"nv::merlin::HashTable::assign::scores":[2,6,1,"_CPPv4N2nv6merlin9HashTable6assignEK9size_typePK8key_typePK10value_typePK10score_type12cudaStream_tb"],"nv::merlin::HashTable::assign::stream":[2,6,1,"_CPPv4N2nv6merlin9HashTable6assignEK9size_typePK8key_typePK10value_typePK10score_type12cudaStream_tb"],"nv::merlin::HashTable::assign::unique_key":[2,6,1,"_CPPv4N2nv6merlin9HashTable6assignEK9size_typePK8key_typePK10value_typePK10score_type12cudaStream_tb"],"nv::merlin::HashTable::assign::values":[2,6,1,"_CPPv4N2nv6merlin9HashTable6assignEK9size_typePK8key_typePK10value_typePK10score_type12cudaStream_tb"],"nv::merlin::HashTable::bucket_count":[2,5,1,"_CPPv4NK2nv6merlin9HashTable12bucket_countEv"],"nv::merlin::HashTable::capacity":[2,5,1,"_CPPv4NK2nv6merlin9HashTable8capacityEv"],"nv::merlin::HashTable::clear":[2,5,1,"_CPPv4N2nv6merlin9HashTable5clearE12cudaStream_t"],"nv::merlin::HashTable::clear::stream":[2,6,1,"_CPPv4N2nv6merlin9HashTable5clearE12cudaStream_t"],"nv::merlin::HashTable::contains":[2,5,1,"_CPPv4NK2nv6merlin9HashTable8containsEK9size_typePK8key_typePb12cudaStream_t"],"nv::merlin::HashTable::contains::founds":[2,6,1,"_CPPv4NK2nv6merlin9HashTable8containsEK9size_typePK8key_typePb12cudaStream_t"],"nv::merlin::HashTable::contains::keys":[2,6,1,"_CPPv4NK2nv6merlin9HashTable8containsEK9size_typePK8key_typePb12cudaStream_t"],"nv::merlin::HashTable::contains::n":[2,6,1,"_CPPv4NK2nv6merlin9HashTable8containsEK9size_typePK8key_typePb12cudaStream_t"],"nv::merlin::HashTable::contains::stream":[2,6,1,"_CPPv4NK2nv6merlin9HashTable8containsEK9size_typePK8key_typePb12cudaStream_t"],"nv::merlin::HashTable::dim":[2,5,1,"_CPPv4NK2nv6merlin9HashTable3dimEv"],"nv::merlin::HashTable::empty":[2,5,1,"_CPPv4NK2nv6merlin9HashTable5emptyE12cudaStream_t"],"nv::merlin::HashTable::empty::stream":[2,6,1,"_CPPv4NK2nv6merlin9HashTable5emptyE12cudaStream_t"],"nv::merlin::HashTable::erase":[2,5,1,"_CPPv4N2nv6merlin9HashTable5eraseEK9size_typePK8key_type12cudaStream_t"],"nv::merlin::HashTable::erase::keys":[2,6,1,"_CPPv4N2nv6merlin9HashTable5eraseEK9size_typePK8key_type12cudaStream_t"],"nv::merlin::HashTable::erase::n":[2,6,1,"_CPPv4N2nv6merlin9HashTable5eraseEK9size_typePK8key_type12cudaStream_t"],"nv::merlin::HashTable::erase::stream":[2,6,1,"_CPPv4N2nv6merlin9HashTable5eraseEK9size_typePK8key_type12cudaStream_t"],"nv::merlin::HashTable::erase_if":[2,5,1,"_CPPv4II00E0EN2nv6merlin9HashTable8erase_ifE9size_typeRK8key_typeRK10score_type12cudaStream_t"],"nv::merlin::HashTable::erase_if::PredFunctor":[2,1,1,"_CPPv4II00E0EN2nv6merlin9HashTable8erase_ifE9size_typeRK8key_typeRK10score_type12cudaStream_t"],"nv::merlin::HashTable::erase_if::pattern":[2,6,1,"_CPPv4II00E0EN2nv6merlin9HashTable8erase_ifE9size_typeRK8key_typeRK10score_type12cudaStream_t"],"nv::merlin::HashTable::erase_if::stream":[2,6,1,"_CPPv4II00E0EN2nv6merlin9HashTable8erase_ifE9size_typeRK8key_typeRK10score_type12cudaStream_t"],"nv::merlin::HashTable::erase_if::threshold":[2,6,1,"_CPPv4II00E0EN2nv6merlin9HashTable8erase_ifE9size_typeRK8key_typeRK10score_type12cudaStream_t"],"nv::merlin::HashTable::evict_strategy":[2,7,1,"_CPPv4N2nv6merlin9HashTable14evict_strategyE"],"nv::merlin::HashTable::export_batch":[2,5,1,"_CPPv4NK2nv6merlin9HashTable12export_batchEK9size_typeK9size_typeP8key_typeP10value_typeP10score_type12cudaStream_t"],"nv::merlin::HashTable::export_batch::d_counter":[2,6,1,"_CPPv4NK2nv6merlin9HashTable12export_batchE9size_typeK9size_typeP9size_typeP8key_typeP10value_typeP10score_type12cudaStream_t"],"nv::merlin::HashTable::export_batch::keys":[2,6,1,"_CPPv4NK2nv6merlin9HashTable12export_batchEK9size_typeK9size_typeP8key_typeP10value_typeP10score_type12cudaStream_t"],"nv::merlin::HashTable::export_batch::n":[2,6,1,"_CPPv4NK2nv6merlin9HashTable12export_batchEK9size_typeK9size_typeP8key_typeP10value_typeP10score_type12cudaStream_t"],"nv::merlin::HashTable::export_batch::offset":[2,6,1,"_CPPv4NK2nv6merlin9HashTable12export_batchEK9size_typeK9size_typeP8key_typeP10value_typeP10score_type12cudaStream_t"],"nv::merlin::HashTable::export_batch::scores":[2,6,1,"_CPPv4NK2nv6merlin9HashTable12export_batchEK9size_typeK9size_typeP8key_typeP10value_typeP10score_type12cudaStream_t"],"nv::merlin::HashTable::export_batch::stream":[2,6,1,"_CPPv4NK2nv6merlin9HashTable12export_batchEK9size_typeK9size_typeP8key_typeP10value_typeP10score_type12cudaStream_t"],"nv::merlin::HashTable::export_batch::values":[2,6,1,"_CPPv4NK2nv6merlin9HashTable12export_batchEK9size_typeK9size_typeP8key_typeP10value_typeP10score_type12cudaStream_t"],"nv::merlin::HashTable::export_batch_if":[2,5,1,"_CPPv4II00E0ENK2nv6merlin9HashTable15export_batch_ifEvRK8key_typeRK10score_type9size_typeK9size_typeP9size_typeP8key_typeP10value_typeP10score_type12cudaStream_t"],"nv::merlin::HashTable::export_batch_if::PredFunctor":[2,1,1,"_CPPv4II00E0ENK2nv6merlin9HashTable15export_batch_ifEvRK8key_typeRK10score_type9size_typeK9size_typeP9size_typeP8key_typeP10value_typeP10score_type12cudaStream_t"],"nv::merlin::HashTable::export_batch_if::d_counter":[2,6,1,"_CPPv4II00E0ENK2nv6merlin9HashTable15export_batch_ifEvRK8key_typeRK10score_type9size_typeK9size_typeP9size_typeP8key_typeP10value_typeP10score_type12cudaStream_t"],"nv::merlin::HashTable::export_batch_if::keys":[2,6,1,"_CPPv4II00E0ENK2nv6merlin9HashTable15export_batch_ifEvRK8key_typeRK10score_type9size_typeK9size_typeP9size_typeP8key_typeP10value_typeP10score_type12cudaStream_t"],"nv::merlin::HashTable::export_batch_if::n":[2,6,1,"_CPPv4II00E0ENK2nv6merlin9HashTable15export_batch_ifEvRK8key_typeRK10score_type9size_typeK9size_typeP9size_typeP8key_typeP10value_typeP10score_type12cudaStream_t"],"nv::merlin::HashTable::export_batch_if::offset":[2,6,1,"_CPPv4II00E0ENK2nv6merlin9HashTable15export_batch_ifEvRK8key_typeRK10score_type9size_typeK9size_typeP9size_typeP8key_typeP10value_typeP10score_type12cudaStream_t"],"nv::merlin::HashTable::export_batch_if::pattern":[2,6,1,"_CPPv4II00E0ENK2nv6merlin9HashTable15export_batch_ifEvRK8key_typeRK10score_type9size_typeK9size_typeP9size_typeP8key_typeP10value_typeP10score_type12cudaStream_t"],"nv::merlin::HashTable::export_batch_if::scores":[2,6,1,"_CPPv4II00E0ENK2nv6merlin9HashTable15export_batch_ifEvRK8key_typeRK10score_type9size_typeK9size_typeP9size_typeP8key_typeP10value_typeP10score_type12cudaStream_t"],"nv::merlin::HashTable::export_batch_if::stream":[2,6,1,"_CPPv4II00E0ENK2nv6merlin9HashTable15export_batch_ifEvRK8key_typeRK10score_type9size_typeK9size_typeP9size_typeP8key_typeP10value_typeP10score_type12cudaStream_t"],"nv::merlin::HashTable::export_batch_if::threshold":[2,6,1,"_CPPv4II00E0ENK2nv6merlin9HashTable15export_batch_ifEvRK8key_typeRK10score_type9size_typeK9size_typeP9size_typeP8key_typeP10value_typeP10score_type12cudaStream_t"],"nv::merlin::HashTable::export_batch_if::values":[2,6,1,"_CPPv4II00E0ENK2nv6merlin9HashTable15export_batch_ifEvRK8key_typeRK10score_type9size_typeK9size_typeP9size_typeP8key_typeP10value_typeP10score_type12cudaStream_t"],"nv::merlin::HashTable::find":[2,5,1,"_CPPv4NK2nv6merlin9HashTable4findEK9size_typePK8key_typePP10value_typePbP10score_type12cudaStream_tb"],"nv::merlin::HashTable::find::founds":[2,6,1,"_CPPv4NK2nv6merlin9HashTable4findEK9size_typePK8key_typePP10value_typePbP10score_type12cudaStream_tb"],"nv::merlin::HashTable::find::keys":[2,6,1,"_CPPv4NK2nv6merlin9HashTable4findEK9size_typePK8key_typePP10value_typePbP10score_type12cudaStream_tb"],"nv::merlin::HashTable::find::n":[2,6,1,"_CPPv4NK2nv6merlin9HashTable4findEK9size_typePK8key_typePP10value_typePbP10score_type12cudaStream_tb"],"nv::merlin::HashTable::find::scores":[2,6,1,"_CPPv4NK2nv6merlin9HashTable4findEK9size_typePK8key_typePP10value_typePbP10score_type12cudaStream_tb"],"nv::merlin::HashTable::find::stream":[2,6,1,"_CPPv4NK2nv6merlin9HashTable4findEK9size_typePK8key_typePP10value_typePbP10score_type12cudaStream_tb"],"nv::merlin::HashTable::find::unique_key":[2,6,1,"_CPPv4NK2nv6merlin9HashTable4findEK9size_typePK8key_typePP10value_typePbP10score_type12cudaStream_tb"],"nv::merlin::HashTable::find::values":[2,6,1,"_CPPv4NK2nv6merlin9HashTable4findEK9size_typePK8key_typePP10value_typePbP10score_type12cudaStream_tb"],"nv::merlin::HashTable::find_or_insert":[2,5,1,"_CPPv4N2nv6merlin9HashTable14find_or_insertEK9size_typePK8key_typePP10value_typePbP10score_type12cudaStream_tbb"],"nv::merlin::HashTable::find_or_insert::founds":[2,6,1,"_CPPv4N2nv6merlin9HashTable14find_or_insertEK9size_typePK8key_typePP10value_typePbP10score_type12cudaStream_tbb"],"nv::merlin::HashTable::find_or_insert::ignore_evict_strategy":[2,6,1,"_CPPv4N2nv6merlin9HashTable14find_or_insertEK9size_typePK8key_typePP10value_typePbP10score_type12cudaStream_tbb"],"nv::merlin::HashTable::find_or_insert::keys":[2,6,1,"_CPPv4N2nv6merlin9HashTable14find_or_insertEK9size_typePK8key_typePP10value_typePbP10score_type12cudaStream_tbb"],"nv::merlin::HashTable::find_or_insert::n":[2,6,1,"_CPPv4N2nv6merlin9HashTable14find_or_insertEK9size_typePK8key_typePP10value_typePbP10score_type12cudaStream_tbb"],"nv::merlin::HashTable::find_or_insert::scores":[2,6,1,"_CPPv4N2nv6merlin9HashTable14find_or_insertEK9size_typePK8key_typePP10value_typePbP10score_type12cudaStream_tbb"],"nv::merlin::HashTable::find_or_insert::stream":[2,6,1,"_CPPv4N2nv6merlin9HashTable14find_or_insertEK9size_typePK8key_typePP10value_typePbP10score_type12cudaStream_tbb"],"nv::merlin::HashTable::find_or_insert::unique_key":[2,6,1,"_CPPv4N2nv6merlin9HashTable14find_or_insertEK9size_typePK8key_typePP10value_typePbP10score_type12cudaStream_tbb"],"nv::merlin::HashTable::find_or_insert::values":[2,6,1,"_CPPv4N2nv6merlin9HashTable14find_or_insertEK9size_typePK8key_typePP10value_typePbP10score_type12cudaStream_tbb"],"nv::merlin::HashTable::init":[2,5,1,"_CPPv4N2nv6merlin9HashTable4initERK16HashTableOptionsP14allocator_type"],"nv::merlin::HashTable::init::allocator":[2,6,1,"_CPPv4N2nv6merlin9HashTable4initERK16HashTableOptionsP14allocator_type"],"nv::merlin::HashTable::init::options":[2,6,1,"_CPPv4N2nv6merlin9HashTable4initERK16HashTableOptionsP14allocator_type"],"nv::merlin::HashTable::insert_and_evict":[2,5,1,"_CPPv4N2nv6merlin9HashTable16insert_and_evictEK9size_typePK8key_typePK10value_typePK10score_typeP8key_typeP10value_typeP10score_typeP9size_type12cudaStream_tbb"],"nv::merlin::HashTable::insert_and_evict::d_evicted_counter":[2,6,1,"_CPPv4N2nv6merlin9HashTable16insert_and_evictEK9size_typePK8key_typePK10value_typePK10score_typeP8key_typeP10value_typeP10score_typeP9size_type12cudaStream_tbb"],"nv::merlin::HashTable::insert_and_evict::evicted_keys":[2,6,1,"_CPPv4N2nv6merlin9HashTable16insert_and_evictEK9size_typePK8key_typePK10value_typePK10score_typeP8key_typeP10value_typeP10score_typeP9size_type12cudaStream_tbb"],"nv::merlin::HashTable::insert_and_evict::evicted_scores":[2,6,1,"_CPPv4N2nv6merlin9HashTable16insert_and_evictEK9size_typePK8key_typePK10value_typePK10score_typeP8key_typeP10value_typeP10score_typeP9size_type12cudaStream_tbb"],"nv::merlin::HashTable::insert_and_evict::evicted_values":[2,6,1,"_CPPv4N2nv6merlin9HashTable16insert_and_evictEK9size_typePK8key_typePK10value_typePK10score_typeP8key_typeP10value_typeP10score_typeP9size_type12cudaStream_tbb"],"nv::merlin::HashTable::insert_and_evict::ignore_evict_strategy":[2,6,1,"_CPPv4N2nv6merlin9HashTable16insert_and_evictEK9size_typePK8key_typePK10value_typePK10score_typeP8key_typeP10value_typeP10score_typeP9size_type12cudaStream_tbb"],"nv::merlin::HashTable::insert_and_evict::keys":[2,6,1,"_CPPv4N2nv6merlin9HashTable16insert_and_evictEK9size_typePK8key_typePK10value_typePK10score_typeP8key_typeP10value_typeP10score_typeP9size_type12cudaStream_tbb"],"nv::merlin::HashTable::insert_and_evict::n":[2,6,1,"_CPPv4N2nv6merlin9HashTable16insert_and_evictEK9size_typePK8key_typePK10value_typePK10score_typeP8key_typeP10value_typeP10score_typeP9size_type12cudaStream_tbb"],"nv::merlin::HashTable::insert_and_evict::scores":[2,6,1,"_CPPv4N2nv6merlin9HashTable16insert_and_evictEK9size_typePK8key_typePK10value_typePK10score_typeP8key_typeP10value_typeP10score_typeP9size_type12cudaStream_tbb"],"nv::merlin::HashTable::insert_and_evict::stream":[2,6,1,"_CPPv4N2nv6merlin9HashTable16insert_and_evictEK9size_typePK8key_typePK10value_typePK10score_typeP8key_typeP10value_typeP10score_typeP9size_type12cudaStream_tbb"],"nv::merlin::HashTable::insert_and_evict::unique_key":[2,6,1,"_CPPv4N2nv6merlin9HashTable16insert_and_evictEK9size_typePK8key_typePK10value_typePK10score_typeP8key_typeP10value_typeP10score_typeP9size_type12cudaStream_tbb"],"nv::merlin::HashTable::insert_and_evict::values":[2,6,1,"_CPPv4N2nv6merlin9HashTable16insert_and_evictEK9size_typePK8key_typePK10value_typePK10score_typeP8key_typeP10value_typeP10score_typeP9size_type12cudaStream_tbb"],"nv::merlin::HashTable::insert_or_assign":[2,5,1,"_CPPv4N2nv6merlin9HashTable16insert_or_assignEK9size_typePK8key_typePK10value_typePK10score_type12cudaStream_tbb"],"nv::merlin::HashTable::insert_or_assign::ignore_evict_strategy":[2,6,1,"_CPPv4N2nv6merlin9HashTable16insert_or_assignEK9size_typePK8key_typePK10value_typePK10score_type12cudaStream_tbb"],"nv::merlin::HashTable::insert_or_assign::keys":[2,6,1,"_CPPv4N2nv6merlin9HashTable16insert_or_assignEK9size_typePK8key_typePK10value_typePK10score_type12cudaStream_tbb"],"nv::merlin::HashTable::insert_or_assign::n":[2,6,1,"_CPPv4N2nv6merlin9HashTable16insert_or_assignEK9size_typePK8key_typePK10value_typePK10score_type12cudaStream_tbb"],"nv::merlin::HashTable::insert_or_assign::scores":[2,6,1,"_CPPv4N2nv6merlin9HashTable16insert_or_assignEK9size_typePK8key_typePK10value_typePK10score_type12cudaStream_tbb"],"nv::merlin::HashTable::insert_or_assign::stream":[2,6,1,"_CPPv4N2nv6merlin9HashTable16insert_or_assignEK9size_typePK8key_typePK10value_typePK10score_type12cudaStream_tbb"],"nv::merlin::HashTable::insert_or_assign::unique_key":[2,6,1,"_CPPv4N2nv6merlin9HashTable16insert_or_assignEK9size_typePK8key_typePK10value_typePK10score_type12cudaStream_tbb"],"nv::merlin::HashTable::insert_or_assign::values":[2,6,1,"_CPPv4N2nv6merlin9HashTable16insert_or_assignEK9size_typePK8key_typePK10value_typePK10score_type12cudaStream_tbb"],"nv::merlin::HashTable::key_type":[2,0,1,"_CPPv4N2nv6merlin9HashTable8key_typeE"],"nv::merlin::HashTable::load":[2,5,1,"_CPPv4N2nv6merlin9HashTable4loadEP10BaseKVFileI1K1V1SEK6size_t12cudaStream_t"],"nv::merlin::HashTable::load::file":[2,6,1,"_CPPv4N2nv6merlin9HashTable4loadEP10BaseKVFileI1K1V1SEK6size_t12cudaStream_t"],"nv::merlin::HashTable::load::max_workspace_size":[2,6,1,"_CPPv4N2nv6merlin9HashTable4loadEP10BaseKVFileI1K1V1SEK6size_t12cudaStream_t"],"nv::merlin::HashTable::load::stream":[2,6,1,"_CPPv4N2nv6merlin9HashTable4loadEP10BaseKVFileI1K1V1SEK6size_t12cudaStream_t"],"nv::merlin::HashTable::load_factor":[2,5,1,"_CPPv4NK2nv6merlin9HashTable11load_factorE12cudaStream_t"],"nv::merlin::HashTable::load_factor::stream":[2,6,1,"_CPPv4NK2nv6merlin9HashTable11load_factorE12cudaStream_t"],"nv::merlin::HashTable::max_bucket_size":[2,5,1,"_CPPv4NK2nv6merlin9HashTable15max_bucket_sizeEv"],"nv::merlin::HashTable::reserve":[2,5,1,"_CPPv4N2nv6merlin9HashTable7reserveEK9size_type12cudaStream_t"],"nv::merlin::HashTable::reserve::new_capacity":[2,6,1,"_CPPv4N2nv6merlin9HashTable7reserveEK9size_type12cudaStream_t"],"nv::merlin::HashTable::reserve::stream":[2,6,1,"_CPPv4N2nv6merlin9HashTable7reserveEK9size_type12cudaStream_t"],"nv::merlin::HashTable::save":[2,5,1,"_CPPv4NK2nv6merlin9HashTable4saveEP10BaseKVFileI1K1V1SEK6size_t12cudaStream_t"],"nv::merlin::HashTable::save::file":[2,6,1,"_CPPv4NK2nv6merlin9HashTable4saveEP10BaseKVFileI1K1V1SEK6size_t12cudaStream_t"],"nv::merlin::HashTable::save::max_workspace_size":[2,6,1,"_CPPv4NK2nv6merlin9HashTable4saveEP10BaseKVFileI1K1V1SEK6size_t12cudaStream_t"],"nv::merlin::HashTable::save::stream":[2,6,1,"_CPPv4NK2nv6merlin9HashTable4saveEP10BaseKVFileI1K1V1SEK6size_t12cudaStream_t"],"nv::merlin::HashTable::score_type":[2,0,1,"_CPPv4N2nv6merlin9HashTable10score_typeE"],"nv::merlin::HashTable::set_max_capacity":[2,5,1,"_CPPv4N2nv6merlin9HashTable16set_max_capacityE9size_type"],"nv::merlin::HashTable::set_max_capacity::new_max_capacity":[2,6,1,"_CPPv4N2nv6merlin9HashTable16set_max_capacityE9size_type"],"nv::merlin::HashTable::size":[2,5,1,"_CPPv4NK2nv6merlin9HashTable4sizeE12cudaStream_t"],"nv::merlin::HashTable::size::stream":[2,6,1,"_CPPv4NK2nv6merlin9HashTable4sizeE12cudaStream_t"],"nv::merlin::HashTable::size_type":[2,0,1,"_CPPv4N2nv6merlin9HashTable9size_typeE"],"nv::merlin::HashTable::value_type":[2,0,1,"_CPPv4N2nv6merlin9HashTable10value_typeE"],"nv::merlin::HashTable::~HashTable":[2,5,1,"_CPPv4N2nv6merlin9HashTableD0Ev"],"nv::merlin::HashTableOptions":[9,2,1,"_CPPv4N2nv6merlin16HashTableOptionsE"],"nv::merlin::HashTableOptions::block_size":[9,7,1,"_CPPv4N2nv6merlin16HashTableOptions10block_sizeE"],"nv::merlin::HashTableOptions::device_id":[9,7,1,"_CPPv4N2nv6merlin16HashTableOptions9device_idE"],"nv::merlin::HashTableOptions::device_memory_pool":[9,7,1,"_CPPv4N2nv6merlin16HashTableOptions18device_memory_poolE"],"nv::merlin::HashTableOptions::dim":[9,7,1,"_CPPv4N2nv6merlin16HashTableOptions3dimE"],"nv::merlin::HashTableOptions::host_memory_pool":[9,7,1,"_CPPv4N2nv6merlin16HashTableOptions16host_memory_poolE"],"nv::merlin::HashTableOptions::init_capacity":[9,7,1,"_CPPv4N2nv6merlin16HashTableOptions13init_capacityE"],"nv::merlin::HashTableOptions::io_block_size":[9,7,1,"_CPPv4N2nv6merlin16HashTableOptions13io_block_sizeE"],"nv::merlin::HashTableOptions::io_by_cpu":[9,7,1,"_CPPv4N2nv6merlin16HashTableOptions9io_by_cpuE"],"nv::merlin::HashTableOptions::max_bucket_size":[9,7,1,"_CPPv4N2nv6merlin16HashTableOptions15max_bucket_sizeE"],"nv::merlin::HashTableOptions::max_capacity":[9,7,1,"_CPPv4N2nv6merlin16HashTableOptions12max_capacityE"],"nv::merlin::HashTableOptions::max_hbm_for_vectors":[9,7,1,"_CPPv4N2nv6merlin16HashTableOptions19max_hbm_for_vectorsE"],"nv::merlin::HashTableOptions::max_load_factor":[9,7,1,"_CPPv4N2nv6merlin16HashTableOptions15max_load_factorE"],"nv::merlin::HashTableOptions::use_constant_memory":[9,7,1,"_CPPv4N2nv6merlin16HashTableOptions19use_constant_memoryE"],"nv::merlin::thrust_par":[12,7,1,"_CPPv4N2nv6merlin10thrust_parE"]}},objnames:{"0":["cpp","type","C++ type"],"1":["cpp","templateParam","C++ template parameter"],"2":["cpp","class","C++ class"],"3":["cpp","enum","C++ enum"],"4":["cpp","enumerator","C++ enumerator"],"5":["cpp","function","C++ function"],"6":["cpp","functionParam","C++ function parameter"],"7":["cpp","member","C++ member"]},objtypes:{"0":"cpp:type","1":"cpp:templateParam","2":"cpp:class","3":"cpp:enum","4":"cpp:enumerator","5":"cpp:function","6":"cpp:functionParam","7":"cpp:member"},terms:{"013":1,"021":1,"026":1,"033":1,"045":1,"047":1,"049":1,"052":1,"069":1,"070":1,"072":1,"073":1,"075":1,"083":1,"086":1,"087":1,"093":1,"094":1,"095":1,"096":1,"0x1":2,"0xffff000000000000":10,"0xffffffff":[1,8],"0xfffffffffffffffc":1,"0xfffffffffffffffd":1,"0xfffffffffffffffe":1,"0xffffffffffffffff":1,"100":1,"101600":7,"1024":[1,2,7,9],"1024ul":7,"1048576":1,"109":1,"111":1,"113":1,"116":1,"121":1,"122":1,"123":1,"124":1,"126":1,"128":[1,7,9],"128u":7,"129":1,"131":1,"177":1,"181":1,"198":1,"199":1,"2022":7,"2023":1,"221":1,"225":1,"238":1,"239":1,"240":1,"242":1,"258":1,"272":1,"274":1,"278":1,"287":1,"288":1,"292":1,"293":1,"294":1,"296":1,"309":1,"316":1,"318":1,"321":1,"32bit":[1,8],"335":1,"359":1,"369":1,"370":1,"374":1,"393":1,"394":1,"395":1,"435":1,"447":1,"452":1,"469":1,"470":1,"478":1,"481":1,"484":1,"493":1,"499":1,"502":1,"511":1,"512":1,"541":1,"545":1,"549":1,"551":1,"569":1,"572":1,"607":1,"608":1,"612":1,"613":1,"617":1,"618":1,"619":1,"621":1,"646":1,"655":1,"657":1,"692":1,"693":1,"705":1,"706":1,"718":1,"726":1,"765":1,"770":1,"771":1,"773":1,"783":1,"784":1,"801":1,"805":1,"807":1,"80gb":1,"815":1,"834":1,"854":1,"865":1,"866":1,"873":1,"930":1,"931":1,"950":1,"951":1,"961":1,"965":1,"982":1,"\u03bb":1,"boolean":2,"byte":[1,9],"char":[1,7],"class":[1,7,10,11],"const":[2,7,8,10],"default":[1,2,9],"enum":[7,8],"export":[1,2,10],"float":[1,2,7,9],"function":[1,2,8,10],"import":[1,2,8],"int":[1,2,7,9],"new":[1,2,7],"public":[0,1,2,7,8,9],"return":[1,2,7,10],"static":[2,7,8,12],"super":1,"throw":[2,7],"true":[2,7,10],"void":[2,7,8],"while":[2,7,8],For:[1,8,13],The:[0,1,2,7,8,9,10],Use:1,Used:8,__device__:[2,10],__forceinline__:[2,10],a100:1,about:13,abov:0,access:[2,7],accommod:2,accum:2,accum_or_assign:[1,2,7],accum_or_assign_kernel:7,accumul:2,actual:2,add:1,added:2,address:[1,2],adher:0,advanc:1,advertis:1,after:[1,8],agre:7,algorithm:2,all:[0,1,2,10],allevi:1,alloc:[2,3,7],allocator_:7,allocator_typ:[2,7],allow:7,alreadi:2,also:[0,1,2],alwai:7,amount:[2,7],ani:[0,1,2,7,8],anoth:2,answer:1,anymor:7,anywai:2,apach:[0,1,7],api:[0,2,8,10],appli:2,applic:7,appropri:2,archtag:[2,7],argc:1,argument:2,argv:1,array_kernel:[3,7],assign:[1,2,7,8],assum:2,atom:[3,7],atomic_bool:7,attempt:7,attribut:2,auto:[7,12],automat:[2,8],avail:13,averag:2,avoid:7,bandwidth:[0,1,13],base:[1,7],basealloc:[2,7],basekvfil:[2,7],bash:1,basi:7,basic:[1,2],batch:[2,7],bazel_build:1,bazelrc:1,becaus:2,been:[1,7],befor:[0,1,2,9],below:1,benchmark_util:1,better:1,billion:1,binari:1,bit:8,block:[1,9],block_siz:[1,7,9],blockquot:8,bool:[1,2,7,9,10],bring:13,bucket:[1,2,7,8,9],bucket_count:[2,7],bucket_max_s:7,bucket_s:7,buckets_num:7,buckets_s:7,bug:[0,1],bypass:1,cach:7,call:[2,7],callabl:7,caller:[1,2,8],can:[0,1,2],capabl:[0,1],capac:[1,2,7,9],certain:[1,2],challeng:1,chang:[0,2,7],check:2,check_evict_strategi:7,chunk:2,clear:[2,7],clear_kernel:7,clock:[1,8],clone:1,close:[1,8],cluster:1,code:1,com:[0,1,13],command:1,comment:1,common:1,commun:1,compact:2,compil:1,complianc:7,comput:1,concept:8,concurr:2,condit:[1,2,7],conduct:[0,2],configur:[2,9],conform:2,confrom:2,consid:2,consider:2,consist:2,constant:[1,8],constexpr:[2,7,12],construct:7,constructor:2,consumpt:1,contain:[1,2,7],containskernelparam:7,containsparam:7,content:7,contribut:1,control:1,copi:7,copyright:7,core_kernel:[3,7],corpor:7,correspond:2,could:[1,8],count:7,counter:7,cpu:[1,9],creat:[1,7],create_t:7,criteria:0,cstdint:[3,7],cuda:[1,2,7,9,12],cuda_check:7,cudacheckerror:7,cudadeviceprop:7,cudadevicesynchron:7,cudaexcept:2,cudagetdevic:7,cudagetdeviceproperti:7,cudamemcpi:7,cudamemcpyasync:7,cudamemcpydefault:7,cudamemcpydevicetohost:7,cudamemcpyhosttodevic:7,cudamemsetasync:7,cudasetdevic:7,cudastream_t:[2,7],cudastreamsynchron:7,cuh:[1,2,4,8,9,10,11,12],current:[2,7],custom:[1,2,7,8],customiz:[1,10],d_count:7,d_counter:[2,7],d_dst:7,d_dst_ptr:7,d_evicted_count:[2,7],d_kei:7,d_mask:7,d_offset:7,d_score:7,d_src_offset:7,d_src_offset_ptr:7,d_table_:7,d_table_value_addr:7,d_valu:7,data:[2,7],dcmake_build_typ:1,default_allocator_:7,defaultalloc:7,defer_lock:7,defin:[1,2,8,9,10,12],definit:[1,11],delet:7,delta:[1,2,7],denot:2,dep:7,descript:1,design:[0,1],destroi:2,destroy_t:7,detail:1,dev_mem_pool_:7,dev_w:7,dev_ws_siz:7,develop:13,devic:[1,2,7,8,9],device_clock:[1,8],device_id:[1,7,9],device_memory_pool:[7,9],device_ptr:7,device_vector:[3,7],devicealloc:7,devicememorypool:7,deviceprop:7,differ:[1,8],difficult:1,dim:[1,2,7,9],dimens:[1,2,9],directli:[1,2],discuss:0,distribut:7,divid:2,dlrm:1,doc:1,docker:1,document:[0,7,13],doe:2,doesn:2,don:1,doubl:2,double_capac:7,dsm:1,dst:7,dst_offset:7,dst_offset_ptr:7,dst_ptr:7,dump:[2,7],dump_kernel:7,dump_kernel_shared_memory_s:7,each:[0,1,2,8,9],effici:1,either:7,element:2,els:7,embed:[0,1],empti:[2,7],empty_kei:7,encount:1,end:[0,1],end_i:7,endif:7,engin:1,enough:7,enumer:8,environ:1,epoch:[1,7,8],epochlfu:1,epochlru:1,equal:[1,2,7,8],equival:2,eras:[2,7,10],erase_if:[2,7,10],eraseifpredfunctor:[2,10],eraseifpredict:[2,3,4,6,7],error:2,evalu:1,evcted_scor:2,even:2,evict:[2,7,8],evict_strategi:[2,7],evicted_kei:[2,7],evicted_scor:[2,7],evicted_valu:[2,7],evictstrategi:[1,2,3,4,6,7],evictstrategyenum:[7,8],evictstrategyparam:7,exampl:[2,10],exceed:2,except:[2,7],exclus:7,execut:2,execute_kernel:7,execution_polici:[3,7],exist:2,expect:2,export_batch:[1,2,7],export_batch_if:[1,2,7,10],exportifpredfunctor:[2,10],express:7,extern:1,face:1,factor:[1,2,9],fals:[1,2,7,9],fast_load_factor:7,featur:[0,1],file:[1,2,8,9,10,12],filesystem:2,filter_condit:7,find:[1,2,7],find_or_insert:[1,2,7],find_or_insert_kernel:7,find_or_insert_kernel_lock_key_hybrid:7,find_or_insert_ptr_kernel_lock_kei:7,find_or_insert_ptr_kernel_unlock_kei:7,first:[2,7,8],firstli:1,fix:[0,1],flag:[1,9],flexible_buff:[3,7],float32:1,follow:[0,1,2],format:2,found:[1,2,7],fourth:2,free:[2,7],frequenc:[1,2,8],from:[1,2,8,10],full:[1,2,8],fulli:[1,8],functor:2,futur:2,gcc:1,gener:[0,1],get:[1,7],get_workspac:7,git:1,github:[0,1,13],global:[1,8],global_epoch:[1,7,8],govern:7,gpu:[0,1,2,7,13],gpu_boolean_mask:7,grab:7,granular:[1,8],grate:[0,1],greater:2,grid_siz:7,group_lock:[3,7],group_shared_mutex:7,growth:1,guarante:2,guid:0,h_dst:7,h_evicted_count:7,h_found:7,h_kei:7,h_param_key_index:7,h_param_valu:7,h_score:7,h_size:7,h_src_offset:7,h_table_value_addr:7,h_valu:7,handl:[1,9],happen:1,has:[1,2],hash:[1,2,9,10],hashtabl:[1,3,4,6,7],hashtableopt:[1,2,3,4,6,7],have:[1,7],hbm:[0,2,7,9,13],header:1,help:1,hierarch:[0,1,2,13],hierarchicalkv:[2,7,9,13],high:[0,1,2,8,13],higher:2,highli:1,hkvtabl:1,hold:7,host:[0,1,2,7,8,9,13],host_mem_pool_:7,host_memory_pool:[7,9],host_w:7,host_ws_siz:7,hostalloc:7,hostmemorypool:7,hot:1,how:0,http:[0,1,7,13],ignor:2,ignore_evict_strategi:[2,7],ignored_global_epoch:7,imag:1,implement:1,impli:7,improv:1,includ:[0,1,7,11],increas:8,increment:[1,8],indic:[1,2,9,10],industri:1,inform:13,init:[1,2,7],init_capac:[1,7,9],initi:[1,2,9],initialized_:7,inlin:[2,7,8],input:[1,8],insert:[1,2,8],insert_and_evict:[1,2,7],insert_or_assign:[1,2,7],insert_unique_lock:7,int64_t:[1,7],integr:0,interest:0,intern:[1,2,8],introduc:[1,2,8],introduct:13,invalid:7,invalid_argu:7,io_block_s:[1,7,9],io_by_cpu:[1,7,9],is_fast_mod:7,is_pow:7,is_pure_hbm:7,is_sam:7,ispow2:7,issu:[0,1],item:[2,10],its:2,june:1,just:2,kcustom:[7,8],keep:[1,2,8],kei:[0,2,7,8,10],kepochlfu:[7,8],kepochlru:[7,8],kernel:[1,7,9],kernel_select_interval_:7,kernelparam:7,kernelselector_findorinsert:7,kernelselector_upd:7,kernelselector_updatescor:7,kernelselector_upsert:7,kernelselector_upsertandevict:7,key_typ:[1,2,7],keys_not_empti:7,keys_ptr:7,kind:7,klfu:[7,8],klru:[1,2,7,8],kvm_size:7,languag:7,larg:[1,2,7],larger:[1,2,7,8],later:1,law:7,lead:2,learn:1,least:[1,2,8],length:[1,2,9],less:[1,2,7,8],lfu:[1,7,8],librari:[0,1,13],licens:[0,7],lifan:1,like:[1,2,8],limit:[1,3,7],line:7,list:[3,11],load:[1,2,7,9],load_factor:[2,7],local:1,lock:7,lookup:7,lookup_kernel:7,lookupkernelparam:7,lookupparam:7,low:[1,8],lru:[1,2,7,8],machin:1,mai:[0,7],main:1,maintain:1,maintian:[0,1],make:[1,7],make_uniqu:[1,7],manag:1,match:[1,2],max:[1,7,8,9],max_bucket_s:[1,2,7,9],max_capac:[1,2,7,9],max_hbm_for_vector:[1,7,9],max_load_factor:[1,7,9],max_value_s:7,max_workspace_s:[2,7],maximum:[1,2,9],mean:1,meet:[0,1],member:9,memori:[0,1,2,3,7,9,13],memory_pool:[3,7],memorypool:7,memorypoolopt:[7,9],memorytyp:7,merlin:[0,1,2,3,4,5,7,8,9],merlin_check:7,merlin_hasht:[1,2,4,8,9,10,11,12],merlin_hashtable_benchmark:1,merlin_hashtable_test:1,method:2,might:2,million:1,min:7,minbucketcapacityfilt:7,minimum:[1,2,8],miss:[1,2],mkdir:1,mode:[7,8],model:1,modifi:[1,2],more:[1,2,8,13],most:1,move:1,must:[1,2,7],mutabl:7,mutex:[3,7],mutex_:7,mutipl:7,n_offset:7,name:1,namespac:[7,11],nanosecond:[1,8],necessari:[0,1],need:[0,1,2,7],need_lock:7,new_capac:[2,7],new_max_capac:[2,7],next:7,noexcept:[2,7],none:7,note:1,noth:2,notic:2,now:2,nullptr:[2,7,8],number:[1,2],numeric_limit:7,nvcr:1,nvidia:[0,7,13],object:2,obtain:7,occur:[2,8],occurr:2,offset:[2,7],old:2,onc:7,one:[1,7],onli:[1,2,7],open:[0,1,13],oper:[1,2,7,10],option:[2,7,9],options_:[2,7],order:1,org:7,origin:1,other:2,otherwis:2,our:13,out:0,output:2,overarch:13,overwrit:1,overwritten:2,pair:2,par:[7,12],par_nosync:7,param:[2,7],param_key_index:7,param_key_index_ptr:7,paramet:[1,2,8],part:[0,1],pattern:[2,7,10],pcie:1,pend:7,per:[1,2],perform:2,permiss:7,pipelin:7,pipeline_max_s:7,pleas:[1,7],plu:7,pointer:1,polici:8,pool:[7,9],posit:2,possibl:7,pow:7,power:[2,7],pragma:7,pred:[2,7],predfunctor:[2,7],predic:2,privat:7,process:[2,7],product:[0,1],program:[3,11],project:[0,13],propos:0,provid:[0,1,8,13],pull:[0,1],pure:7,put:2,py3:1,question:1,rais:2,ram:13,reach:[1,8],reach_max_capacity_:7,read:[1,7],read_kernel:7,read_or_write_by_cpu:7,read_or_write_kernel:7,read_or_write_kernel_unlock_kei:7,read_shared_lock:7,real:1,recent:8,recommend:[0,1,2],recsi:[0,1],recurs:1,reduc:[1,2,7],refer:[0,1],regular:1,rehash:[1,2,9],rehash_kernel_for_fast_mod:7,reinterpret_cast:7,releas:[1,2],remov:[2,10],remove_kernel:7,replac:2,repositori:13,request:[0,2,7],requir:[1,2,7],reserv:[1,2,7,9],reset:7,resolut:1,resourc:[1,2],respons:2,restraint:1,result:2,review:[0,1],run:[1,7],runtime_error:7,safe:2,safe_get_block_s:7,safe_get_grid_s:7,same:[1,2],satisfi:2,save:[2,7],scale:1,scenario:1,score:[1,2,7,8,10],score_s:7,score_typ:[1,2,7],scoredata:7,search:[1,2],second:1,see:[7,13],select_kernel:7,selectaccumorassignkernelwithio:7,selectcontainskernel:7,selectfindorinsertkernelwithio:7,selectfindorinsertptrkernel:7,selectlookupkernelwithio:7,selectlookupptrkernel:7,selector:7,selectpipelinecontainskernel:7,selectpipelinelookupkernelwithio:7,selectupdatekernelwithio:7,selectupdatescorekernel:7,selectupsertandevictkernelwithio:7,selectupsertkernelwithio:7,send:0,separ:1,seper:2,serv:1,set:[1,2,7,8],set_global_epoch:[7,8],set_max_capac:[2,7],sever:1,shape:2,share:2,shared_mem_size_:7,shared_mutex:[3,7],shared_s:7,sharedmemperblock:7,should:[1,2,7,8,10],signatur:2,signific:0,simplifi:1,singl:[1,7],situat:2,size:[1,2,7,9],size_ptr:7,size_t:[1,2,7,9],size_typ:[2,7],sizeof:7,slightli:[1,8],slot:2,sm80:[2,7],small:1,smaller:7,softwar:7,some:0,someth:1,sophist:1,sort:[3,7],sort_by_kei:7,sourc:[1,13],special:2,specif:[1,7],specifi:[1,2,7],src:7,src_offset:7,src_offset_ptr:7,src_ptr:7,ssd:2,standard:1,start_i:7,static_assert:7,static_cast:7,statu:[1,2],std:[1,7],step:7,step_count:7,storag:[0,1,2,7],store:[0,1],strategi:[2,7,8],stream:[2,7],struct:[1,2,3,6,7,10],submiss:0,submit:1,successfulli:2,suitabl:1,support:[2,7],sure:7,sync_table_configur:7,system:1,tabl:[1,2,7,9,10],table_:7,table_value_ptr:7,tablecor:7,tableopt:1,target:2,team:[0,1],templat:[3,4,6,7,10],temporari:[2,7],tensorflow:1,terabyt:1,test:[0,1],tf2:1,than:[2,7],thei:[1,2,8],them:1,thi:[0,2,7,10],third:2,thread:[1,2],thread_loc:7,threshold:[2,7,10],through:7,throughput:1,thrust:[3,7,12],thrust_par:[3,4,6,7],thrust_vers:7,tie:7,tile:7,tile_s:7,time:1,timestamp:[2,8],tlp_lookup_kernel_hybrid:7,tlp_lookup_ptr_kernel_with_filt:7,tlp_update_kernel_hybrid:7,todo:7,togeth:13,too:[1,2,7],total:2,total_count:7,total_s:7,train:1,travers:10,treat:2,tupl:[1,2,7],tuple_s:7,type:[1,2,3,7,8],type_trait:[3,7],typedef:11,typenam:[2,7],uint32_t:7,uint64_t:[1,2,7,8],uintptr_t:7,under:7,uniqu:2,unique_kei:[2,7],unique_ptr:[1,7],unit:[0,1],unless:[2,7],unsaf:[1,2],unsign:7,updat:[1,2],update_kernel:7,update_read_lock:7,update_shared_lock:7,upsert_kernel:7,upsert_kernel_lock_key_hybrid:7,use:[0,2,7],use_constant_memori:[7,9],used:2,user:[0,1,2],using:[1,2,7,10,13],util:[1,3,7],valid:1,valu:[0,1,2,7,8],value_or_delta:[2,7],value_s:7,value_typ:[2,7],variabl:[1,11],vecd_load:7,vector:[1,2,9],vectors_or_delta:2,veri:1,verifi:1,version:[1,7],via:[0,1,8],warmup:1,warranti:7,websit:13,welcom:0,well:1,when:[0,1,2,7,8],whether:2,which:[1,2,8,10],within:2,without:[2,7],won:[2,7],work:1,workload:1,workspac:7,world:1,would:8,write:7,write_by_cpu:7,write_kernel:7,write_kernel_unlock_kei:7,write_with_accum_kernel:7,ws_size:7,www:7,you:[0,1,2,7],your:[0,1],zero:7,zhangyafei:1},titles:["Contributing","NVIDIA HierarchicalKV(Beta)","Template Class HashTable","File merlin_hashtable.cuh","HierarchicalKV C++ API Documentation","Namespace nv","Namespace nv::merlin","Program Listing for File merlin_hashtable.cuh","Struct EvictStrategy","Struct HashTableOptions","Typedef nv::merlin::EraseIfPredict","Complete HierarchicalKV API","Variable nv::merlin::thrust_par","Merlin Key-Value Storage"],titleterms:{"class":[2,3,4,6],about:[0,1],acknowledg:1,addit:0,api:[1,4,11],bazel:1,benchmark:1,benefit:1,beta:1,build:1,cmake:1,code:0,commun:0,complet:[4,11],configur:1,content:[3,5,6],contribut:0,contributor:1,cuh:[3,7],definit:3,document:[1,2,4,8,9,10,12],eraseifpredict:10,evict:1,evictstrategi:8,feedback:1,file:[3,4,7,11],hashtabl:2,hashtableopt:9,hbm:1,hierarchi:4,hierarchicalkv:[0,1,4,11],hmem:1,how:1,hybrid:1,idea:1,includ:3,kei:[1,13],licenc:0,licens:1,list:7,maintainership:0,matrix:1,matur:1,merlin:[6,10,12,13],merlin_hasht:[3,7],mode:1,namespac:[3,4,5,6],nvidia:1,option:1,perform:1,program:7,pure:1,relat:13,requir:0,resourc:13,restrict:1,storag:13,strategi:1,struct:[4,8,9],style:0,support:1,templat:2,thrust_par:12,typedef:[3,4,6,10],usag:1,use:1,valu:13,variabl:[3,4,6,12]}}) \ No newline at end of file