From f0e2205890ae27137d448c39dfaf3701ac28eb5a Mon Sep 17 00:00:00 2001 From: Karthik Velayutham Date: Fri, 6 Mar 2020 15:41:33 -0600 Subject: [PATCH 01/30] Initial pmdk pushes for clht_lb_res --- .vscode/settings.json | 7 ++ P-CLHT/CMakeLists.txt | 10 ++- P-CLHT/Makefile | 2 +- P-CLHT/example.cpp | 11 +++ P-CLHT/include/clht_lb_res.h | 8 ++- P-CLHT/src/clht_gc.c | 21 +++--- P-CLHT/src/clht_lb_res.c | 136 ++++++++++++++++++++++++++--------- 7 files changed, 147 insertions(+), 48 deletions(-) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..1a5c7902 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,7 @@ +{ + "files.associations": { + "mutex": "c", + "shared_mutex": "c", + "condition_variable": "c" + } +} \ No newline at end of file diff --git a/P-CLHT/CMakeLists.txt b/P-CLHT/CMakeLists.txt index 1697d1cd..ef380494 100644 --- a/P-CLHT/CMakeLists.txt +++ b/P-CLHT/CMakeLists.txt @@ -38,9 +38,17 @@ find_library(JemallocLib jemalloc) find_library(TbbLib tbb) find_package (Threads) +if(PKG_CONFIG_FOUND) + pkg_check_modules(LIBPMEMOBJ++ REQUIRED libpmemobj++) +else() + find_package(LIBPMEMOBJ++ REQUIRED) +endif() + +link_directories(${LIBPMEMOBJ++_LIBRARY_DIRS}) + set(P_CLHT_TEST example.cpp src/clht_lb_res.c src/clht_gc.c external/sspfd/sspfd.c external/ssmem/src/ssmem.c) add_executable(example ${P_CLHT_TEST}) -target_link_libraries(example ${TbbLib} ${JemallocLib} boost_system +target_link_libraries(example ${TbbLib} ${JemallocLib} ${LIBPMEMOBJ++_LIBRARIES} boost_system boost_thread pthread) diff --git a/P-CLHT/Makefile b/P-CLHT/Makefile index daa01641..49fc7352 100644 --- a/P-CLHT/Makefile +++ b/P-CLHT/Makefile @@ -151,7 +151,7 @@ CFLAGS += $(PLATFORM) CFLAGS += $(OPTIMIZE) CFLAGS += $(DEBUG_FLAGS) -INCLUDES := -I$(MAININCLUDE) -I$(TOP)/external/include +INCLUDES := -I$(MAININCLUDE) -I$(TOP)/external/include OBJ_FILES := clht_gc.o SRC := src diff --git a/P-CLHT/example.cpp b/P-CLHT/example.cpp index e76b5bb6..43ee43b4 100644 --- a/P-CLHT/example.cpp +++ b/P-CLHT/example.cpp @@ -6,8 +6,19 @@ #include #include "tbb/tbb.h" +#include +#include +#include +#include + using namespace std; +using pmem::obj::delete_persistent; +using pmem::obj::make_persistent; +using pmem::obj::p; +using pmem::obj::persistent_ptr; +using pmem::obj::pool; + #include "clht.h" #include "ssmem.h" diff --git a/P-CLHT/include/clht_lb_res.h b/P-CLHT/include/clht_lb_res.h index 94a1108d..66b75576 100644 --- a/P-CLHT/include/clht_lb_res.h +++ b/P-CLHT/include/clht_lb_res.h @@ -36,6 +36,7 @@ #include #include "atomic_ops.h" #include "utils.h" +#include #ifdef __cplusplus extern "C" { @@ -173,8 +174,10 @@ typedef struct ALIGNED(CACHE_LINE_SIZE) clht { struct { - struct clht_hashtable_s* ht; + PMEMoid ht; + // struct clht_hashtable_s* ht; uint8_t next_cache_line[CACHE_LINE_SIZE - (sizeof(void*))]; + // Prob need to add TOID to this as well struct clht_hashtable_s* ht_oldest; struct ht_ts* version_list; size_t version_min; @@ -193,7 +196,8 @@ typedef struct ALIGNED(CACHE_LINE_SIZE) clht_hashtable_s struct { size_t num_buckets; - bucket_t* table; + PMEMoid table; + //bucket_t* table; size_t hash; size_t version; uint8_t next_cache_line[CACHE_LINE_SIZE - (3 * sizeof(size_t)) - (sizeof(void*))]; diff --git a/P-CLHT/src/clht_gc.c b/P-CLHT/src/clht_gc.c index 797dbfe8..d58feea9 100644 --- a/P-CLHT/src/clht_gc.c +++ b/P-CLHT/src/clht_gc.c @@ -47,7 +47,8 @@ clht_gc_thread_init(clht_t* h, int id) ht_ts_t* ts = (ht_ts_t*) memalign(CACHE_LINE_SIZE, sizeof(ht_ts_t)); assert(ts != NULL); - ts->version = h->ht->version; + clht_hashtable_t* ht_ptr = pmemobj_direct(h->ht); + ts->version = ht_ptr->version; ts->id = id; do @@ -128,7 +129,8 @@ clht_gc_min_version_used(clht_t* h) { volatile ht_ts_t* cur = h->version_list; - size_t min = h->ht->version; + clht_hashtable_t* ht_ptr = pmemobj_direct(h->ht); + size_t min = ht_ptr->version; while (cur != NULL) { if (cur->version < min) @@ -149,8 +151,9 @@ clht_gc_min_version_used(clht_t* h) static int clht_gc_collect_cond(clht_t* hashtable, int collect_not_referenced_only) { + clht_hashtable_t* ht_ptr = pmemobj_direct(hashtable->ht); /* if version_min >= current version there is nothing to collect! */ - if ((hashtable->version_min >= hashtable->ht->version) || TRYLOCK_ACQ(&hashtable->gc_lock)) + if ((hashtable->version_min >= ht_ptr->version) || TRYLOCK_ACQ(&hashtable->gc_lock)) { /* printf("** someone else is performing gc\n"); */ return 0; @@ -160,7 +163,7 @@ clht_gc_collect_cond(clht_t* hashtable, int collect_not_referenced_only) /* printf("[GCOLLE-%02d] LOCK : %zu\n", GET_ID(collect_not_referenced_only), hashtable->version); */ - size_t version_min = hashtable->ht->version; + size_t version_min = ht_ptr->version; if (collect_not_referenced_only) { version_min = clht_gc_min_version_used(hashtable); @@ -221,7 +224,7 @@ clht_gc_free(clht_hashtable_t* hashtable) uint64_t bin; for (bin = 0; bin < num_buckets; bin++) { - bucket = hashtable->table + bin; + bucket = pmemobj_direct(hashtable->table) + bin; bucket = bucket->next; while (bucket != NULL) { @@ -232,7 +235,7 @@ clht_gc_free(clht_hashtable_t* hashtable) } #endif - free(hashtable->table); + free(pmemobj_direct(hashtable->table)); free(hashtable); return 1; @@ -246,7 +249,7 @@ clht_gc_destroy(clht_t* hashtable) { #if !defined(CLHT_LINKED) clht_gc_collect_all(hashtable); - clht_gc_free(hashtable->ht); + clht_gc_free(pmemobj_direct(hashtable->ht)); free(hashtable); #endif @@ -270,7 +273,7 @@ clht_gc_release(clht_hashtable_t* hashtable) uint64_t bin; for (bin = 0; bin < num_buckets; bin++) { - bucket = hashtable->table + bin; + bucket = pmemobj_direct(hashtable->table) + bin; bucket = bucket->next; while (bucket != NULL) { @@ -281,7 +284,7 @@ clht_gc_release(clht_hashtable_t* hashtable) } #endif - ssmem_release(clht_alloc, hashtable->table); + ssmem_release(clht_alloc, pmemobj_direct(hashtable->table)); ssmem_release(clht_alloc, hashtable); return 1; } diff --git a/P-CLHT/src/clht_lb_res.c b/P-CLHT/src/clht_lb_res.c index 9f86c3c2..2f67bd9a 100644 --- a/P-CLHT/src/clht_lb_res.c +++ b/P-CLHT/src/clht_lb_res.c @@ -36,8 +36,27 @@ #include #include +#include + #include "clht_lb_res.h" +// TOID_DECLARE(clht_t, 1); +// TOID_DECLARE(clht_hashtable_t, 2); +// TOID_DECLARE(bucket_t, 3); +// TOID_DECLARE(struct clht_hashtable_s, 4); + +// Initialize the persistent memory pool +POBJ_LAYOUT_BEGIN(clht); +POBJ_LAYOUT_ROOT(clht, struct root); +POBJ_LAYOUT_TOID(clht, clht_t); +POBJ_LAYOUT_TOID(clht, clht_hashtable_t); +POBJ_LAYOUT_TOID(clht, bucket_t); +POBJ_LAYOUT_END(clht); + +struct root { + struct clht_t *ht; +}; + //#define CLHTDEBUG //#define CRASH_AFTER_SWAP_CLHT //#define CRASH_BEFORE_SWAP_CLHT @@ -72,6 +91,9 @@ __thread size_t check_ht_status_steps = CLHT_STATUS_INVOK_IN; #endif */ +/* Global pool pointer */ +PMEMobjpool* pop; + const char* clht_type_desc() { @@ -175,7 +197,13 @@ static inline void clflush_next_check(char *data, int len, bool fence) clht_bucket_create() { bucket_t* bucket = NULL; - bucket = (bucket_t *) memalign(CACHE_LINE_SIZE, sizeof(bucket_t)); + PMEMoid bucket_oid; + if (pmemobj_alloc(pop, &bucket_oid, sizeof(bucket_t), 0, 0, 0)) { + fprintf(stderr, "pmemobj_alloc for clht_bucket_create\n"); + assert(0); + } + // bucket = (bucket_t *) memalign(CACHE_LINE_SIZE, sizeof(bucket_t)); + bucket = pmemobj_direct(bucket_oid); if (bucket == NULL) { return NULL; @@ -206,20 +234,39 @@ clht_bucket_create_stats(clht_hashtable_t* h, int* resize) return b; } + clht_hashtable_t* clht_hashtable_create(uint64_t num_buckets); clht_t* clht_create(uint64_t num_buckets) { - clht_t* w = (clht_t*) memalign(CACHE_LINE_SIZE, sizeof(clht_t)); + // Create a PMEM pool, open some file + pop = pmemobj_create("some file", POBJ_LAYOUT_NAME(clht), PMEMOBJ_MIN_POOL, 0666); + if (pop == NULL) + printf("failed to open the pool\n"); + + // Create the root pointer + PMEMoid my_root = pmemobj_root(pop, sizeof(clht_t)); + clht_t* root_ptr = pmemobj_direct(my_root); + + // Allocate the table in persistent memory + if (pmemobj_alloc(pop, &root_ptr->ht, sizeof(clht_t), 0, 0, 0)) { + fprintf(stderr, "pmemobj_alloc for clht_create\n"); + assert(0); + } + + clht_t* w = root_ptr; + if (w == NULL) { - printf("** malloc @ hatshtalbe\n"); + printf("** malloc @ hashtable\n"); return NULL; } - w->ht = clht_hashtable_create(num_buckets); - if (w->ht == NULL) + struct clht_hashtable_s* ht_ptr = pmemobj_direct(w->ht); + ht_ptr = clht_hashtable_create(num_buckets); + + if (ht_ptr == NULL) { free(w); return NULL; @@ -229,12 +276,14 @@ clht_create(uint64_t num_buckets) w->status_lock = LOCK_FREE; w->version_list = NULL; w->version_min = 0; - w->ht_oldest = w->ht; + w->ht_oldest = ht_ptr; - clflush((char *)w->ht->table, num_buckets * sizeof(bucket_t), true); - clflush((char *)w->ht, sizeof(clht_hashtable_t), true); + // Make sure to change the flushing to the correct offset given the pointer + clflush((char *)pmemobj_direct(ht_ptr->table), num_buckets * sizeof(bucket_t), true); + clflush((char *)ht_ptr, sizeof(clht_hashtable_t), true); clflush((char *)w, sizeof(clht_t), true); + pmemobj_close(pop); return w; } @@ -249,7 +298,15 @@ clht_hashtable_create(uint64_t num_buckets) } /* Allocate the table itself. */ - hashtable = (clht_hashtable_t*) memalign(CACHE_LINE_SIZE, sizeof(clht_hashtable_t)); + // hashtable = (clht_hashtable_t*) memalign(CACHE_LINE_SIZE, sizeof(clht_hashtable_t)); + // Allocate the table in persistent memory + PMEMoid ht_oid; + if (pmemobj_alloc(pop, &ht_oid, sizeof(clht_hashtable_t), 0, 0, 0)) { + fprintf(stderr, "pmemobj_alloc for clht_hashtable_create\n"); + assert(0); + } + hashtable = pmemobj_direct(ht_oid); + if (hashtable == NULL) { printf("** malloc @ hatshtalbe\n"); @@ -257,24 +314,32 @@ clht_hashtable_create(uint64_t num_buckets) } /* hashtable->table = calloc(num_buckets, (sizeof(bucket_t))); */ - hashtable->table = (bucket_t*) memalign(CACHE_LINE_SIZE, num_buckets * (sizeof(bucket_t))); - if (hashtable->table == NULL) + // hashtable->table = (bucket_t*) memalign(CACHE_LINE_SIZE, num_buckets * (sizeof(bucket_t))); + PMEMoid table_oid; + if (pmemobj_alloc(pop, &table_oid, num_buckets * sizeof(bucket_t), 0, 0, 0)) { + fprintf(stderr, "pmemobj_alloc for table_oid in clht_hashtable_create\n"); + assert(0); + } + hashtable->table = table_oid; + bucket_t* bucket_ptr = pmemobj_direct(hashtable->table); + + if (bucket_ptr == NULL) { printf("** alloc: hashtable->table\n"); fflush(stdout); free(hashtable); return NULL; } - memset(hashtable->table, 0, num_buckets * (sizeof(bucket_t))); + memset(bucket_ptr, 0, num_buckets * (sizeof(bucket_t))); uint64_t i; for (i = 0; i < num_buckets; i++) { - hashtable->table[i].lock = LOCK_FREE; + bucket_ptr[i].lock = LOCK_FREE; uint32_t j; for (j = 0; j < ENTRIES_PER_BUCKET; j++) { - hashtable->table[i].key[j] = 0; + bucket_ptr[i].key[j] = 0; } } @@ -316,7 +381,7 @@ clht_get(clht_hashtable_t* hashtable, clht_addr_t key) { size_t bin = clht_hash(hashtable, key); CLHT_GC_HT_VERSION_USED(hashtable); - volatile bucket_t* bucket = hashtable->table + bin; + volatile bucket_t* bucket = pmemobj_direct(hashtable->table) + bin; uint32_t j; do @@ -369,9 +434,9 @@ bucket_exists(volatile bucket_t* bucket, clht_addr_t key) int clht_put(clht_t* h, clht_addr_t key, clht_val_t val) { - clht_hashtable_t* hashtable = h->ht; + clht_hashtable_t* hashtable = pmemobj_direct(h->ht); size_t bin = clht_hash(hashtable, key); - volatile bucket_t* bucket = hashtable->table + bin; + volatile bucket_t* bucket = pmemobj_direct(hashtable->table) + bin; #if CLHT_READ_ONLY_FAIL == 1 if (bucket_exists(bucket, key)) { @@ -382,10 +447,10 @@ clht_put(clht_t* h, clht_addr_t key, clht_val_t val) clht_lock_t* lock = &bucket->lock; while (!LOCK_ACQ(lock, hashtable)) { - hashtable = h->ht; + hashtable = pmemobj_direct(h->ht); size_t bin = clht_hash(hashtable, key); - bucket = hashtable->table + bin; + bucket = pmemobj_direct(hashtable->table) + bin; lock = &bucket->lock; } @@ -467,9 +532,9 @@ clht_put(clht_t* h, clht_addr_t key, clht_val_t val) clht_val_t clht_remove(clht_t* h, clht_addr_t key) { - clht_hashtable_t* hashtable = h->ht; + clht_hashtable_t* hashtable = pmemobj_direct(h->ht); size_t bin = clht_hash(hashtable, key); - volatile bucket_t* bucket = hashtable->table + bin; + volatile bucket_t* bucket = pmemobj_direct(hashtable->table) + bin; #if CLHT_READ_ONLY_FAIL == 1 if (!bucket_exists(bucket, key)) @@ -481,10 +546,10 @@ clht_remove(clht_t* h, clht_addr_t key) clht_lock_t* lock = &bucket->lock; while (!LOCK_ACQ(lock, hashtable)) { - hashtable = h->ht; + hashtable = pmemobj_direct(h->ht); size_t bin = clht_hash(hashtable, key); - bucket = hashtable->table + bin; + bucket = pmemobj_direct(hashtable->table) + bin; lock = &bucket->lock; } @@ -515,7 +580,7 @@ clht_remove(clht_t* h, clht_addr_t key) static uint32_t clht_put_seq(clht_hashtable_t* hashtable, clht_addr_t key, clht_val_t val, uint64_t bin) { - volatile bucket_t* bucket = hashtable->table + bin; + volatile bucket_t* bucket = pmemobj_direct(hashtable->table) + bin; uint32_t j; do @@ -614,7 +679,7 @@ ht_resize_help(clht_hashtable_t* h) /* hash = num_buckets - 1 */ for (b = h->hash; b >= 0; b--) { - bucket_t* bu_cur = h->table + b; + bucket_t* bu_cur = pmemobj_direct(h->table) + b; if (!bucket_cpy((clht_t *)h, bu_cur, h->table_tmp)) { /* reached a point where the resizer is handling */ /* printf("[GC-%02d] helped #buckets: %10zu = %5.1f%%\n", */ @@ -635,7 +700,7 @@ ht_resize_pes(clht_t* h, int is_increase, int by) check_ht_status_steps = CLHT_STATUS_INVOK; - clht_hashtable_t* ht_old = h->ht; + clht_hashtable_t* ht_old = pmemobj_direct(h->ht); if (TRYLOCK_ACQ(&h->resize_lock)) { @@ -667,7 +732,7 @@ ht_resize_pes(clht_t* h, int is_increase, int by) size_t b; for (b = 0; b < ht_old->num_buckets; b++) { - bucket_t* bu_cur = ht_old->table + b; + bucket_t* bu_cur = pmemobj_direct(ht_old->table) + b; int ret = bucket_cpy(h, bu_cur, ht_new); /* reached a point where the helper is handling */ if (ret == -1) return -1; @@ -715,7 +780,7 @@ ht_resize_pes(clht_t* h, int is_increase, int by) mfence(); clflush((char *)ht_new, sizeof(clht_hashtable_t), false); - clflush_next_check((char *)ht_new->table, num_buckets_new * sizeof(bucket_t), false); + clflush_next_check((char *)pmemobj_direct(ht_new->table), num_buckets_new * sizeof(bucket_t), false); mfence(); #if defined(CRASH_BEFORE_SWAP_CLHT) @@ -823,7 +888,7 @@ clht_size(clht_hashtable_t* hashtable) uint64_t bin; for (bin = 0; bin < num_buckets; bin++) { - bucket = hashtable->table + bin; + bucket = pmemobj_direct(hashtable->table) + bin; uint32_t j; do @@ -853,7 +918,7 @@ ht_status(clht_t* h, int resize_increase, int just_print) return 0; } - clht_hashtable_t* hashtable = h->ht; + clht_hashtable_t* hashtable = pmemobj_direct(h->ht); uint64_t num_buckets = hashtable->num_buckets; volatile bucket_t* bucket = NULL; size_t size = 0; @@ -863,7 +928,7 @@ ht_status(clht_t* h, int resize_increase, int just_print) uint64_t bin; for (bin = 0; bin < num_buckets; bin++) { - bucket = hashtable->table + bin; + bucket = pmemobj_direct(hashtable->table) + bin; int expands_cont = -1; expands--; @@ -979,7 +1044,7 @@ clht_print(clht_hashtable_t* hashtable) uint64_t bin; for (bin = 0; bin < num_buckets; bin++) { - bucket = hashtable->table + bin; + bucket = pmemobj_direct(hashtable->table) + bin; printf("[[%05zu]] ", bin); @@ -1006,7 +1071,7 @@ clht_print(clht_hashtable_t* hashtable) void clht_lock_initialization(clht_t *h) { DEBUG_PRINT("Performing Lock initialization\n"); - clht_hashtable_t *ht = h->ht; + clht_hashtable_t *ht = pmemobj_direct(h->ht); volatile bucket_t *next; h->resize_lock = LOCK_FREE; @@ -1015,8 +1080,9 @@ void clht_lock_initialization(clht_t *h) int i; for (i = 0; i < ht->num_buckets; i++) { - ht->table[i].lock = LOCK_FREE; - for (next = ht->table[i].next; next != NULL; next = next->next) { + bucket_t* temp = pmemobj_direct(ht->table); + temp[i].lock = LOCK_FREE; + for (next = temp[i].next; next != NULL; next = next->next) { next->lock = LOCK_FREE; } } From 8aaf5c28b46cf4019caf0d270e95cca2fc11906c Mon Sep 17 00:00:00 2001 From: Karthik Velayutham Date: Fri, 13 Mar 2020 16:21:31 -0500 Subject: [PATCH 02/30] Refactored & bug --- .vscode/settings.json | 3 +- P-CLHT/example.cpp | 3 +- P-CLHT/include/clht_lb_res.h | 8 ++++++ P-CLHT/src/clht_gc.c | 2 ++ P-CLHT/src/clht_lb_res.c | 53 ++++++++++++++++-------------------- 5 files changed, 38 insertions(+), 31 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 1a5c7902..2849a3c4 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -2,6 +2,7 @@ "files.associations": { "mutex": "c", "shared_mutex": "c", - "condition_variable": "c" + "condition_variable": "c", + "chrono": "cpp" } } \ No newline at end of file diff --git a/P-CLHT/example.cpp b/P-CLHT/example.cpp index 43ee43b4..14cb46cd 100644 --- a/P-CLHT/example.cpp +++ b/P-CLHT/example.cpp @@ -71,7 +71,7 @@ void run(char **argv) { printf("operation,n,ops/s\n"); clht_t *hashtable = clht_create(512); - + printf("hashtable: %p\n", hashtable); barrier_init(&barrier, num_thread); thread_data_t *tds = (thread_data_t *) malloc(num_thread * sizeof(thread_data_t)); @@ -80,6 +80,7 @@ void run(char **argv) { { // Load + printf("the hashtable in thread: %p\n", hashtable); auto starttime = std::chrono::system_clock::now(); next_thread_id.store(0); auto func = [&]() { diff --git a/P-CLHT/include/clht_lb_res.h b/P-CLHT/include/clht_lb_res.h index 66b75576..01101cef 100644 --- a/P-CLHT/include/clht_lb_res.h +++ b/P-CLHT/include/clht_lb_res.h @@ -445,6 +445,14 @@ int ht_resize_pes(clht_t* hashtable, int is_increase, int by); const char* clht_type_desc(); void clht_lock_initialization(clht_t *h); + +// Initialize the persistent memory pool +POBJ_LAYOUT_BEGIN(clht); +POBJ_LAYOUT_ROOT(clht, clht_t); +POBJ_LAYOUT_TOID(clht, clht_hashtable_t); +POBJ_LAYOUT_TOID(clht, bucket_t); +POBJ_LAYOUT_END(clht); + #ifdef __cplusplus } #endif diff --git a/P-CLHT/src/clht_gc.c b/P-CLHT/src/clht_gc.c index d58feea9..0e70a782 100644 --- a/P-CLHT/src/clht_gc.c +++ b/P-CLHT/src/clht_gc.c @@ -40,6 +40,7 @@ static __thread ht_ts_t* clht_ts_thread = NULL; void clht_gc_thread_init(clht_t* h, int id) { + printf("undertale\n"); clht_alloc = (ssmem_allocator_t*) malloc(sizeof(ssmem_allocator_t)); assert(clht_alloc != NULL); ssmem_alloc_init_fs_size(clht_alloc, SSMEM_DEFAULT_MEM_SIZE, SSMEM_GC_FREE_SET_SIZE, id); @@ -48,6 +49,7 @@ clht_gc_thread_init(clht_t* h, int id) assert(ts != NULL); clht_hashtable_t* ht_ptr = pmemobj_direct(h->ht); + printf("ht_ptr: %p\n", h->ht); ts->version = ht_ptr->version; ts->id = id; diff --git a/P-CLHT/src/clht_lb_res.c b/P-CLHT/src/clht_lb_res.c index 2f67bd9a..4059b082 100644 --- a/P-CLHT/src/clht_lb_res.c +++ b/P-CLHT/src/clht_lb_res.c @@ -40,18 +40,8 @@ #include "clht_lb_res.h" -// TOID_DECLARE(clht_t, 1); -// TOID_DECLARE(clht_hashtable_t, 2); -// TOID_DECLARE(bucket_t, 3); -// TOID_DECLARE(struct clht_hashtable_s, 4); - -// Initialize the persistent memory pool -POBJ_LAYOUT_BEGIN(clht); -POBJ_LAYOUT_ROOT(clht, struct root); -POBJ_LAYOUT_TOID(clht, clht_t); -POBJ_LAYOUT_TOID(clht, clht_hashtable_t); -POBJ_LAYOUT_TOID(clht, bucket_t); -POBJ_LAYOUT_END(clht); +/* Global pool pointer */ +PMEMobjpool* pop; struct root { struct clht_t *ht; @@ -91,8 +81,6 @@ __thread size_t check_ht_status_steps = CLHT_STATUS_INVOK_IN; #endif */ -/* Global pool pointer */ -PMEMobjpool* pop; const char* clht_type_desc() @@ -198,7 +186,8 @@ clht_bucket_create() { bucket_t* bucket = NULL; PMEMoid bucket_oid; - if (pmemobj_alloc(pop, &bucket_oid, sizeof(bucket_t), 0, 0, 0)) { + if (pmemobj_alloc(pop, &bucket_oid, sizeof(bucket_t), 0, 0, 0)) + { fprintf(stderr, "pmemobj_alloc for clht_bucket_create\n"); assert(0); } @@ -240,22 +229,28 @@ clht_hashtable_t* clht_hashtable_create(uint64_t num_buckets); clht_t* clht_create(uint64_t num_buckets) { - // Create a PMEM pool, open some file - pop = pmemobj_create("some file", POBJ_LAYOUT_NAME(clht), PMEMOBJ_MIN_POOL, 0666); + // Open the PMEMpool if it exists, otherwise create it. + if( access("/mnt/pmem/pool", F_OK ) != -1 ) + { + pop = pmemobj_open("/mnt/pmem/pool", POBJ_LAYOUT_NAME(clht)); + } else + { + pop = pmemobj_create("/mnt/pmem/pool", POBJ_LAYOUT_NAME(clht), PMEMOBJ_MIN_POOL, 0666); + } + if (pop == NULL) - printf("failed to open the pool\n"); - + { + perror("failed to open the pool\n"); + } + // Create the root pointer PMEMoid my_root = pmemobj_root(pop, sizeof(clht_t)); - clht_t* root_ptr = pmemobj_direct(my_root); - - // Allocate the table in persistent memory - if (pmemobj_alloc(pop, &root_ptr->ht, sizeof(clht_t), 0, 0, 0)) { - fprintf(stderr, "pmemobj_alloc for clht_create\n"); - assert(0); - } + if (pmemobj_direct(my_root) == NULL) + { + perror("root pointer is null\n"); + } - clht_t* w = root_ptr; + clht_t* w = pmemobj_direct(my_root); if (w == NULL) { @@ -263,8 +258,8 @@ clht_create(uint64_t num_buckets) return NULL; } - struct clht_hashtable_s* ht_ptr = pmemobj_direct(w->ht); - ht_ptr = clht_hashtable_create(num_buckets); + struct clht_hashtable_s* ht_ptr = clht_hashtable_create(num_buckets); + w->ht = pmemobj_oid(ht_ptr); if (ht_ptr == NULL) { From 922b912e9f23377cf63417ab2d950f646e42b887 Mon Sep 17 00:00:00 2001 From: Karthik Velayutham Date: Fri, 13 Mar 2020 21:19:36 -0500 Subject: [PATCH 03/30] Fixed seg fault --- P-CLHT/example.cpp | 32 ++++++++++++-------------------- P-CLHT/src/clht_gc.c | 3 +-- P-CLHT/src/clht_lb_res.c | 7 +++++-- 3 files changed, 18 insertions(+), 24 deletions(-) diff --git a/P-CLHT/example.cpp b/P-CLHT/example.cpp index 14cb46cd..0bb3eec4 100644 --- a/P-CLHT/example.cpp +++ b/P-CLHT/example.cpp @@ -6,20 +6,9 @@ #include #include "tbb/tbb.h" -#include -#include -#include -#include +#include -using namespace std; - -using pmem::obj::delete_persistent; -using pmem::obj::make_persistent; -using pmem::obj::p; -using pmem::obj::persistent_ptr; -using pmem::obj::pool; - -#include "clht.h" +#include "clht_lb_res.h" #include "ssmem.h" typedef struct thread_data { @@ -72,6 +61,7 @@ void run(char **argv) { clht_t *hashtable = clht_create(512); printf("hashtable: %p\n", hashtable); + barrier_init(&barrier, num_thread); thread_data_t *tds = (thread_data_t *) malloc(num_thread * sizeof(thread_data_t)); @@ -128,13 +118,15 @@ void run(char **argv) { clht_gc_thread_init(tds[thread_id].ht, tds[thread_id].id); barrier_cross(&barrier); - for (uint64_t i = start_key; i < end_key; i++) { - uintptr_t val = clht_get(tds[thread_id].ht->ht, keys[i]); - if (val != keys[i]) { - std::cout << "[CLHT] wrong key read: " << val << "expected: " << keys[i] << std::endl; - exit(1); - } - } + // for (uint64_t i = start_key; i < end_key; i++) { + // clht_t *r = tds[thread_id].ht; + // struct clht_hashtable_s* ht = pmemobj_direct(r->ht); + // uintptr_t val = clht_get(ht, keys[i]); + // if (val != keys[i]) { + // std::cout << "[CLHT] wrong key read: " << val << "expected: " << keys[i] << std::endl; + // exit(1); + // } + // } }; std::vector thread_group; diff --git a/P-CLHT/src/clht_gc.c b/P-CLHT/src/clht_gc.c index 0e70a782..6497ebe2 100644 --- a/P-CLHT/src/clht_gc.c +++ b/P-CLHT/src/clht_gc.c @@ -40,7 +40,6 @@ static __thread ht_ts_t* clht_ts_thread = NULL; void clht_gc_thread_init(clht_t* h, int id) { - printf("undertale\n"); clht_alloc = (ssmem_allocator_t*) malloc(sizeof(ssmem_allocator_t)); assert(clht_alloc != NULL); ssmem_alloc_init_fs_size(clht_alloc, SSMEM_DEFAULT_MEM_SIZE, SSMEM_GC_FREE_SET_SIZE, id); @@ -239,7 +238,7 @@ clht_gc_free(clht_hashtable_t* hashtable) free(pmemobj_direct(hashtable->table)); free(hashtable); - + return 1; } diff --git a/P-CLHT/src/clht_lb_res.c b/P-CLHT/src/clht_lb_res.c index 4059b082..5ccbcf35 100644 --- a/P-CLHT/src/clht_lb_res.c +++ b/P-CLHT/src/clht_lb_res.c @@ -258,9 +258,12 @@ clht_create(uint64_t num_buckets) return NULL; } - struct clht_hashtable_s* ht_ptr = clht_hashtable_create(num_buckets); + clht_hashtable_t* ht_ptr = clht_hashtable_create(num_buckets); w->ht = pmemobj_oid(ht_ptr); + printf("orig ht: %p\n", ht_ptr); + printf("w->ht: %p\n", pmemobj_direct(w->ht)); + if (ht_ptr == NULL) { free(w); @@ -278,7 +281,7 @@ clht_create(uint64_t num_buckets) clflush((char *)ht_ptr, sizeof(clht_hashtable_t), true); clflush((char *)w, sizeof(clht_t), true); - pmemobj_close(pop); + // pmemobj_close(pop); return w; } From 1d5656412952342f981bcc86e92f0366275e01ad Mon Sep 17 00:00:00 2001 From: Karthik Velayutham Date: Sun, 15 Mar 2020 18:52:42 -0500 Subject: [PATCH 04/30] Main logic working with PMDK Still some bugs to be hashed out: - Seg fault thrown when clht_gc_destroy is called - PM file unable to handle larger sizes for keys - Need to implement PM logic for buckets --- P-CLHT/example.cpp | 22 ++++++++++------------ P-CLHT/src/clht_gc.c | 18 ++++++++++-------- P-CLHT/src/clht_lb_res.c | 28 +++++++++++++--------------- 3 files changed, 33 insertions(+), 35 deletions(-) diff --git a/P-CLHT/example.cpp b/P-CLHT/example.cpp index 0bb3eec4..e3f17d6c 100644 --- a/P-CLHT/example.cpp +++ b/P-CLHT/example.cpp @@ -60,7 +60,6 @@ void run(char **argv) { printf("operation,n,ops/s\n"); clht_t *hashtable = clht_create(512); - printf("hashtable: %p\n", hashtable); barrier_init(&barrier, num_thread); @@ -70,7 +69,6 @@ void run(char **argv) { { // Load - printf("the hashtable in thread: %p\n", hashtable); auto starttime = std::chrono::system_clock::now(); next_thread_id.store(0); auto func = [&]() { @@ -118,15 +116,15 @@ void run(char **argv) { clht_gc_thread_init(tds[thread_id].ht, tds[thread_id].id); barrier_cross(&barrier); - // for (uint64_t i = start_key; i < end_key; i++) { - // clht_t *r = tds[thread_id].ht; - // struct clht_hashtable_s* ht = pmemobj_direct(r->ht); - // uintptr_t val = clht_get(ht, keys[i]); - // if (val != keys[i]) { - // std::cout << "[CLHT] wrong key read: " << val << "expected: " << keys[i] << std::endl; - // exit(1); - // } - // } + for (uint64_t i = start_key; i < end_key; i++) { + PMEMoid pmem_ht = (PMEMoid)((tds[thread_id].ht)->ht); + clht_hashtable_t *ht = (clht_hashtable_t*)pmemobj_direct(pmem_ht); + uintptr_t val = clht_get(ht, keys[i]); + if (val != keys[i]) { + std::cout << "[CLHT] wrong key read: " << val << "expected: " << keys[i] << std::endl; + exit(1); + } + } }; std::vector thread_group; @@ -140,7 +138,7 @@ void run(char **argv) { std::chrono::system_clock::now() - starttime); printf("Throughput: run, %f ,ops/us\n", (n * 1.0) / duration.count()); } - clht_gc_destroy(hashtable); + // clht_gc_destroy(hashtable); delete[] keys; } diff --git a/P-CLHT/src/clht_gc.c b/P-CLHT/src/clht_gc.c index 6497ebe2..cf7aefc0 100644 --- a/P-CLHT/src/clht_gc.c +++ b/P-CLHT/src/clht_gc.c @@ -48,7 +48,7 @@ clht_gc_thread_init(clht_t* h, int id) assert(ts != NULL); clht_hashtable_t* ht_ptr = pmemobj_direct(h->ht); - printf("ht_ptr: %p\n", h->ht); + ts->version = ht_ptr->version; ts->id = id; @@ -225,14 +225,16 @@ clht_gc_free(clht_hashtable_t* hashtable) uint64_t bin; for (bin = 0; bin < num_buckets; bin++) { - bucket = pmemobj_direct(hashtable->table) + bin; + bucket = ((bucket_t*)pmemobj_direct(hashtable->table)) + bin; bucket = bucket->next; + // printf("before the inner while\n"); while (bucket != NULL) - { - volatile bucket_t* cur = bucket; - bucket = bucket->next; - free((void*) cur); - } + { + volatile bucket_t* cur = bucket; + bucket = bucket->next; + // printf("CUR: %p\n", cur); + free((void*) cur); + } } #endif @@ -274,7 +276,7 @@ clht_gc_release(clht_hashtable_t* hashtable) uint64_t bin; for (bin = 0; bin < num_buckets; bin++) { - bucket = pmemobj_direct(hashtable->table) + bin; + bucket = ((bucket_t*)pmemobj_direct(hashtable->table)) + bin; bucket = bucket->next; while (bucket != NULL) { diff --git a/P-CLHT/src/clht_lb_res.c b/P-CLHT/src/clht_lb_res.c index 5ccbcf35..c90b6fe4 100644 --- a/P-CLHT/src/clht_lb_res.c +++ b/P-CLHT/src/clht_lb_res.c @@ -188,7 +188,7 @@ clht_bucket_create() PMEMoid bucket_oid; if (pmemobj_alloc(pop, &bucket_oid, sizeof(bucket_t), 0, 0, 0)) { - fprintf(stderr, "pmemobj_alloc for clht_bucket_create\n"); + fprintf(stderr, "pmemobj_alloc failed for clht_bucket_create\n"); assert(0); } // bucket = (bucket_t *) memalign(CACHE_LINE_SIZE, sizeof(bucket_t)); @@ -261,9 +261,6 @@ clht_create(uint64_t num_buckets) clht_hashtable_t* ht_ptr = clht_hashtable_create(num_buckets); w->ht = pmemobj_oid(ht_ptr); - printf("orig ht: %p\n", ht_ptr); - printf("w->ht: %p\n", pmemobj_direct(w->ht)); - if (ht_ptr == NULL) { free(w); @@ -300,7 +297,7 @@ clht_hashtable_create(uint64_t num_buckets) // Allocate the table in persistent memory PMEMoid ht_oid; if (pmemobj_alloc(pop, &ht_oid, sizeof(clht_hashtable_t), 0, 0, 0)) { - fprintf(stderr, "pmemobj_alloc for clht_hashtable_create\n"); + fprintf(stderr, "pmemobj_alloc failed for clht_hashtable_create\n"); assert(0); } hashtable = pmemobj_direct(ht_oid); @@ -315,7 +312,7 @@ clht_hashtable_create(uint64_t num_buckets) // hashtable->table = (bucket_t*) memalign(CACHE_LINE_SIZE, num_buckets * (sizeof(bucket_t))); PMEMoid table_oid; if (pmemobj_alloc(pop, &table_oid, num_buckets * sizeof(bucket_t), 0, 0, 0)) { - fprintf(stderr, "pmemobj_alloc for table_oid in clht_hashtable_create\n"); + fprintf(stderr, "pmemobj_alloc failed for table_oid in clht_hashtable_create\n"); assert(0); } hashtable->table = table_oid; @@ -379,7 +376,7 @@ clht_get(clht_hashtable_t* hashtable, clht_addr_t key) { size_t bin = clht_hash(hashtable, key); CLHT_GC_HT_VERSION_USED(hashtable); - volatile bucket_t* bucket = pmemobj_direct(hashtable->table) + bin; + volatile bucket_t* bucket = ((bucket_t*)pmemobj_direct(hashtable->table)) + bin; uint32_t j; do @@ -434,7 +431,8 @@ clht_put(clht_t* h, clht_addr_t key, clht_val_t val) { clht_hashtable_t* hashtable = pmemobj_direct(h->ht); size_t bin = clht_hash(hashtable, key); - volatile bucket_t* bucket = pmemobj_direct(hashtable->table) + bin; + volatile bucket_t* bucket = ((bucket_t*)pmemobj_direct(hashtable->table)) + bin; + #if CLHT_READ_ONLY_FAIL == 1 if (bucket_exists(bucket, key)) { @@ -448,7 +446,7 @@ clht_put(clht_t* h, clht_addr_t key, clht_val_t val) hashtable = pmemobj_direct(h->ht); size_t bin = clht_hash(hashtable, key); - bucket = pmemobj_direct(hashtable->table) + bin; + bucket = ((bucket_t*)pmemobj_direct(hashtable->table)) + bin; lock = &bucket->lock; } @@ -532,7 +530,7 @@ clht_remove(clht_t* h, clht_addr_t key) { clht_hashtable_t* hashtable = pmemobj_direct(h->ht); size_t bin = clht_hash(hashtable, key); - volatile bucket_t* bucket = pmemobj_direct(hashtable->table) + bin; + volatile bucket_t* bucket = ((bucket_t*)pmemobj_direct(hashtable->table)) + bin; #if CLHT_READ_ONLY_FAIL == 1 if (!bucket_exists(bucket, key)) @@ -547,7 +545,7 @@ clht_remove(clht_t* h, clht_addr_t key) hashtable = pmemobj_direct(h->ht); size_t bin = clht_hash(hashtable, key); - bucket = pmemobj_direct(hashtable->table) + bin; + bucket = ((bucket_t*)pmemobj_direct(hashtable->table)) + bin; lock = &bucket->lock; } @@ -578,7 +576,7 @@ clht_remove(clht_t* h, clht_addr_t key) static uint32_t clht_put_seq(clht_hashtable_t* hashtable, clht_addr_t key, clht_val_t val, uint64_t bin) { - volatile bucket_t* bucket = pmemobj_direct(hashtable->table) + bin; + volatile bucket_t* bucket = ((bucket_t*)pmemobj_direct(hashtable->table)) + bin; uint32_t j; do @@ -886,7 +884,7 @@ clht_size(clht_hashtable_t* hashtable) uint64_t bin; for (bin = 0; bin < num_buckets; bin++) { - bucket = pmemobj_direct(hashtable->table) + bin; + bucket = ((bucket_t*)pmemobj_direct(hashtable->table)) + bin; uint32_t j; do @@ -926,7 +924,7 @@ ht_status(clht_t* h, int resize_increase, int just_print) uint64_t bin; for (bin = 0; bin < num_buckets; bin++) { - bucket = pmemobj_direct(hashtable->table) + bin; + bucket = ((bucket_t*)pmemobj_direct(hashtable->table)) + bin; int expands_cont = -1; expands--; @@ -1042,7 +1040,7 @@ clht_print(clht_hashtable_t* hashtable) uint64_t bin; for (bin = 0; bin < num_buckets; bin++) { - bucket = pmemobj_direct(hashtable->table) + bin; + bucket = ((bucket_t*)pmemobj_direct(hashtable->table)) + bin; printf("[[%05zu]] ", bin); From 1982610b28e1e160cc3305bf1085f7c24f5a78d7 Mon Sep 17 00:00:00 2001 From: Karthik Velayutham Date: Sun, 15 Mar 2020 20:20:02 -0500 Subject: [PATCH 05/30] Implemented PM clean-up, solved segfault Some bugs: - For larger sizes of n, still having pmemobj_alloc errors --- P-CLHT/example.cpp | 2 +- P-CLHT/src/clht_gc.c | 8 +++++--- P-CLHT/src/clht_lb_res.c | 5 +++-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/P-CLHT/example.cpp b/P-CLHT/example.cpp index e3f17d6c..a6a7806a 100644 --- a/P-CLHT/example.cpp +++ b/P-CLHT/example.cpp @@ -138,7 +138,7 @@ void run(char **argv) { std::chrono::system_clock::now() - starttime); printf("Throughput: run, %f ,ops/us\n", (n * 1.0) / duration.count()); } - // clht_gc_destroy(hashtable); + clht_gc_destroy(hashtable); delete[] keys; } diff --git a/P-CLHT/src/clht_gc.c b/P-CLHT/src/clht_gc.c index cf7aefc0..ccac29ad 100644 --- a/P-CLHT/src/clht_gc.c +++ b/P-CLHT/src/clht_gc.c @@ -233,13 +233,15 @@ clht_gc_free(clht_hashtable_t* hashtable) volatile bucket_t* cur = bucket; bucket = bucket->next; // printf("CUR: %p\n", cur); - free((void*) cur); + PMEMoid cur_oid = pmemobj_oid(cur); + pmemobj_free(&cur_oid); } } #endif - free(pmemobj_direct(hashtable->table)); - free(hashtable); + pmemobj_free(&(hashtable->table)); + PMEMoid ht_oid = pmemobj_oid(hashtable); + pmemobj_free(&ht_oid); return 1; } diff --git a/P-CLHT/src/clht_lb_res.c b/P-CLHT/src/clht_lb_res.c index c90b6fe4..93336c28 100644 --- a/P-CLHT/src/clht_lb_res.c +++ b/P-CLHT/src/clht_lb_res.c @@ -259,6 +259,7 @@ clht_create(uint64_t num_buckets) } clht_hashtable_t* ht_ptr = clht_hashtable_create(num_buckets); + w->ht = pmemobj_oid(ht_ptr); if (ht_ptr == NULL) @@ -675,7 +676,7 @@ ht_resize_help(clht_hashtable_t* h) /* hash = num_buckets - 1 */ for (b = h->hash; b >= 0; b--) { - bucket_t* bu_cur = pmemobj_direct(h->table) + b; + bucket_t* bu_cur = ((bucket_t*)pmemobj_direct(h->table)) + b; if (!bucket_cpy((clht_t *)h, bu_cur, h->table_tmp)) { /* reached a point where the resizer is handling */ /* printf("[GC-%02d] helped #buckets: %10zu = %5.1f%%\n", */ @@ -728,7 +729,7 @@ ht_resize_pes(clht_t* h, int is_increase, int by) size_t b; for (b = 0; b < ht_old->num_buckets; b++) { - bucket_t* bu_cur = pmemobj_direct(ht_old->table) + b; + bucket_t* bu_cur = (bucket_t*)(pmemobj_direct(ht_old->table)) + b; int ret = bucket_cpy(h, bu_cur, ht_new); /* reached a point where the helper is handling */ if (ret == -1) return -1; From 298bf59b03759405b182790c4021efdadc73c4f0 Mon Sep 17 00:00:00 2001 From: pyrito Date: Mon, 16 Mar 2020 19:59:52 -0500 Subject: [PATCH 06/30] Modified clean-up in clht_gc.c, found resizing bug --- P-CLHT/src/clht_gc.c | 16 +++++++++++----- P-CLHT/src/clht_lb_res.c | 4 ++-- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/P-CLHT/src/clht_gc.c b/P-CLHT/src/clht_gc.c index ccac29ad..567c2dd4 100644 --- a/P-CLHT/src/clht_gc.c +++ b/P-CLHT/src/clht_gc.c @@ -233,14 +233,14 @@ clht_gc_free(clht_hashtable_t* hashtable) volatile bucket_t* cur = bucket; bucket = bucket->next; // printf("CUR: %p\n", cur); - PMEMoid cur_oid = pmemobj_oid(cur); + PMEMoid cur_oid = pmemobj_oid((void*) cur); pmemobj_free(&cur_oid); } } #endif pmemobj_free(&(hashtable->table)); - PMEMoid ht_oid = pmemobj_oid(hashtable); + PMEMoid ht_oid = pmemobj_oid((void*) hashtable); pmemobj_free(&ht_oid); return 1; @@ -284,13 +284,19 @@ clht_gc_release(clht_hashtable_t* hashtable) { volatile bucket_t* cur = bucket; bucket = bucket->next; - ssmem_release(clht_alloc, (void*) cur); + // ssmem_release(clht_alloc, (void*) cur); + PMEMoid cur_oid = pmemobj_oid((void*) cur); + pmemobj_free(&cur_oid); } } #endif - ssmem_release(clht_alloc, pmemobj_direct(hashtable->table)); - ssmem_release(clht_alloc, hashtable); + // ssmem_release(clht_alloc, pmemobj_direct(hashtable->table)); + // ssmem_release(clht_alloc, hashtable); + + pmemobj_free(&(hashtable->table)); + PMEMoid ht_oid = pmemobj_oid((void*) hashtable); + pmemobj_free(&ht_oid); return 1; } diff --git a/P-CLHT/src/clht_lb_res.c b/P-CLHT/src/clht_lb_res.c index 93336c28..4b56e84b 100644 --- a/P-CLHT/src/clht_lb_res.c +++ b/P-CLHT/src/clht_lb_res.c @@ -718,8 +718,8 @@ ht_resize_pes(clht_t* h, int is_increase, int by) num_buckets_new = ht_old->num_buckets / CLHT_RATIO_HALVE; } - /* printf("// resizing: from %8zu to %8zu buckets\n", ht_old->num_buckets, num_buckets_new); */ - + printf("// resizing: from %8zu to %8zu buckets\n", ht_old->num_buckets, num_buckets_new); + clht_hashtable_t* ht_new = clht_hashtable_create(num_buckets_new); ht_new->version = ht_old->version + 1; From 6a5a4d4cd02f8022d5db1f192defd0ef5a141325 Mon Sep 17 00:00:00 2001 From: pyrito Date: Tue, 17 Mar 2020 01:03:12 -0500 Subject: [PATCH 07/30] Fixed PMEMpool size, still having segfault --- P-CLHT/src/clht_gc.c | 2 +- P-CLHT/src/clht_lb_res.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/P-CLHT/src/clht_gc.c b/P-CLHT/src/clht_gc.c index 567c2dd4..ebfca9ae 100644 --- a/P-CLHT/src/clht_gc.c +++ b/P-CLHT/src/clht_gc.c @@ -48,7 +48,7 @@ clht_gc_thread_init(clht_t* h, int id) assert(ts != NULL); clht_hashtable_t* ht_ptr = pmemobj_direct(h->ht); - + printf("ht_ptr: %p\n", ht_ptr); ts->version = ht_ptr->version; ts->id = id; diff --git a/P-CLHT/src/clht_lb_res.c b/P-CLHT/src/clht_lb_res.c index 4b56e84b..8cf388f5 100644 --- a/P-CLHT/src/clht_lb_res.c +++ b/P-CLHT/src/clht_lb_res.c @@ -230,12 +230,13 @@ clht_hashtable_t* clht_hashtable_create(uint64_t num_buckets); clht_create(uint64_t num_buckets) { // Open the PMEMpool if it exists, otherwise create it. + size_t pool_size = 2*1024*1024*1024UL; if( access("/mnt/pmem/pool", F_OK ) != -1 ) { pop = pmemobj_open("/mnt/pmem/pool", POBJ_LAYOUT_NAME(clht)); } else { - pop = pmemobj_create("/mnt/pmem/pool", POBJ_LAYOUT_NAME(clht), PMEMOBJ_MIN_POOL, 0666); + pop = pmemobj_create("/mnt/pmem/pool", POBJ_LAYOUT_NAME(clht), pool_size, 0666); } if (pop == NULL) From b3ead0ce50e96b176d69b3073e68172f666e2e47 Mon Sep 17 00:00:00 2001 From: pyrito Date: Wed, 18 Mar 2020 20:08:02 -0500 Subject: [PATCH 08/30] Solved seg fault for hashtable. WIP --- P-CLHT/src/clht_gc.c | 17 ++++++++++++----- P-CLHT/src/clht_lb_res.c | 23 +++++++++++++++-------- 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/P-CLHT/src/clht_gc.c b/P-CLHT/src/clht_gc.c index ebfca9ae..3a9b675d 100644 --- a/P-CLHT/src/clht_gc.c +++ b/P-CLHT/src/clht_gc.c @@ -49,6 +49,8 @@ clht_gc_thread_init(clht_t* h, int id) clht_hashtable_t* ht_ptr = pmemobj_direct(h->ht); printf("ht_ptr: %p\n", ht_ptr); + // printf("ht_offset: %llu\n", h->ht.off); + // printf("ht_uuid: %llu\n", h->ht.pool_uuid_lo); ts->version = ht_ptr->version; ts->id = id; @@ -226,8 +228,10 @@ clht_gc_free(clht_hashtable_t* hashtable) for (bin = 0; bin < num_buckets; bin++) { bucket = ((bucket_t*)pmemobj_direct(hashtable->table)) + bin; + if (bin == 0) + printf("bucket: %p\n", bucket); bucket = bucket->next; - // printf("before the inner while\n"); + while (bucket != NULL) { volatile bucket_t* cur = bucket; @@ -255,7 +259,8 @@ clht_gc_destroy(clht_t* hashtable) #if !defined(CLHT_LINKED) clht_gc_collect_all(hashtable); clht_gc_free(pmemobj_direct(hashtable->ht)); - free(hashtable); + PMEMoid ht_oid = pmemobj_oid((void*) hashtable); + pmemobj_free(&ht_oid); #endif // ssmem_alloc_term(clht_alloc); @@ -277,9 +282,12 @@ clht_gc_release(clht_hashtable_t* hashtable) uint64_t bin; for (bin = 0; bin < num_buckets; bin++) - { + { bucket = ((bucket_t*)pmemobj_direct(hashtable->table)) + bin; + if (bin == 0) + printf("bucket: %p\n", bucket); bucket = bucket->next; + while (bucket != NULL) { volatile bucket_t* cur = bucket; @@ -288,12 +296,11 @@ clht_gc_release(clht_hashtable_t* hashtable) PMEMoid cur_oid = pmemobj_oid((void*) cur); pmemobj_free(&cur_oid); } - } + } #endif // ssmem_release(clht_alloc, pmemobj_direct(hashtable->table)); // ssmem_release(clht_alloc, hashtable); - pmemobj_free(&(hashtable->table)); PMEMoid ht_oid = pmemobj_oid((void*) hashtable); pmemobj_free(&ht_oid); diff --git a/P-CLHT/src/clht_lb_res.c b/P-CLHT/src/clht_lb_res.c index 8cf388f5..64388cb9 100644 --- a/P-CLHT/src/clht_lb_res.c +++ b/P-CLHT/src/clht_lb_res.c @@ -379,7 +379,7 @@ clht_get(clht_hashtable_t* hashtable, clht_addr_t key) size_t bin = clht_hash(hashtable, key); CLHT_GC_HT_VERSION_USED(hashtable); volatile bucket_t* bucket = ((bucket_t*)pmemobj_direct(hashtable->table)) + bin; - + uint32_t j; do { @@ -694,7 +694,7 @@ ht_resize_help(clht_hashtable_t* h) int ht_resize_pes(clht_t* h, int is_increase, int by) { -// ticks s = getticks(); + ticks s = getticks(); check_ht_status_steps = CLHT_STATUS_INVOK; @@ -752,7 +752,7 @@ ht_resize_pes(clht_t* h, int is_increase, int by) size_t b; for (b = 0; b < ht_old->num_buckets; b++) { - bucket_t* bu_cur = ht_old->table + b; + bucket_t* bu_cur = (bucket_t*)(pmemobj_direct(ht_old->table)) + b; int ret = bucket_cpy(h, bu_cur, ht_new); if (ret == -1) return -1; @@ -812,7 +812,14 @@ ht_resize_pes(clht_t* h, int is_increase, int by) #endif // atomically swap the root pointer - SWAP_U64((uint64_t*) h, (uint64_t) ht_new); + // are there any race conditions? + PMEMoid ht_new_oid = pmemobj_oid(ht_new); + uint64_t ht_new_oid_off = ht_new_oid.off; + uint64_t h_new = (uint64_t)h + sizeof(uint64_t); + + //SWAP_U64((uint64_t*) h, (uint64_t) ht_new); + SWAP_U64((uint64_t*)h_new, ht_new_oid_off); + clflush((char *)h, sizeof(uint64_t), true); #if defined(CRASH_AFTER_SWAP_CLHT) @@ -848,10 +855,10 @@ ht_resize_pes(clht_t* h, int is_increase, int by) ht_old->table_new = ht_new; TRYLOCK_RLS(h->resize_lock); -// ticks e = getticks() - s; -// double mba = (ht_new->num_buckets * 64) / (1024.0 * 1024); -// printf("[RESIZE-%02d] to #bu %7zu = MB: %7.2f | took: %13llu ti = %8.6f s\n", -// clht_gc_get_id(), ht_new->num_buckets, mba, (unsigned long long) e, e / 2.1e9); + ticks e = getticks() - s; + double mba = (ht_new->num_buckets * 64) / (1024.0 * 1024); + printf("[RESIZE-%02d] to #bu %7zu = MB: %7.2f | took: %13llu ti = %8.6f s\n", + clht_gc_get_id(), ht_new->num_buckets, mba, (unsigned long long) e, e / 2.1e9); #if defined(CLHTDEBUG) DEBUG_PRINT("-------------ht old------------\n"); From b859a6bca933e8d81913883d8a1854dfa0f13844 Mon Sep 17 00:00:00 2001 From: pyrito Date: Wed, 18 Mar 2020 20:21:00 -0500 Subject: [PATCH 09/30] Replicated bug --- P-CLHT/src/clht_gc.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/P-CLHT/src/clht_gc.c b/P-CLHT/src/clht_gc.c index 3a9b675d..e487f273 100644 --- a/P-CLHT/src/clht_gc.c +++ b/P-CLHT/src/clht_gc.c @@ -292,18 +292,18 @@ clht_gc_release(clht_hashtable_t* hashtable) { volatile bucket_t* cur = bucket; bucket = bucket->next; - // ssmem_release(clht_alloc, (void*) cur); - PMEMoid cur_oid = pmemobj_oid((void*) cur); - pmemobj_free(&cur_oid); + ssmem_release(clht_alloc, (void*) cur); + // PMEMoid cur_oid = pmemobj_oid((void*) cur); + // pmemobj_free(&cur_oid); } } #endif - // ssmem_release(clht_alloc, pmemobj_direct(hashtable->table)); - // ssmem_release(clht_alloc, hashtable); - pmemobj_free(&(hashtable->table)); - PMEMoid ht_oid = pmemobj_oid((void*) hashtable); - pmemobj_free(&ht_oid); + ssmem_release(clht_alloc, pmemobj_direct(hashtable->table)); + ssmem_release(clht_alloc, hashtable); + // pmemobj_free(&(hashtable->table)); + // PMEMoid ht_oid = pmemobj_oid((void*) hashtable); + // pmemobj_free(&ht_oid); return 1; } From 5ec9a474a4780154928af4a2db2761e06bfe112f Mon Sep 17 00:00:00 2001 From: pyrito Date: Sat, 21 Mar 2020 16:03:52 -0500 Subject: [PATCH 10/30] Fixed seg fault, still issue with ssmem --- P-CLHT/example.cpp | 56 ++++++++++++------------- P-CLHT/include/clht_lb_res.h | 4 +- P-CLHT/src/clht_gc.c | 24 ++++------- P-CLHT/src/clht_lb_res.c | 79 +++++++++++++++++++++++++----------- 4 files changed, 95 insertions(+), 68 deletions(-) diff --git a/P-CLHT/example.cpp b/P-CLHT/example.cpp index a6a7806a..f8321e1b 100644 --- a/P-CLHT/example.cpp +++ b/P-CLHT/example.cpp @@ -59,7 +59,7 @@ void run(char **argv) { printf("operation,n,ops/s\n"); - clht_t *hashtable = clht_create(512); + clht_t *hashtable = clht_open(512); barrier_init(&barrier, num_thread); @@ -67,39 +67,39 @@ void run(char **argv) { std::atomic next_thread_id; - { - // Load - auto starttime = std::chrono::system_clock::now(); - next_thread_id.store(0); - auto func = [&]() { - int thread_id = next_thread_id.fetch_add(1); - tds[thread_id].id = thread_id; - tds[thread_id].ht = hashtable; + // { + // // Load + // auto starttime = std::chrono::system_clock::now(); + // next_thread_id.store(0); + // auto func = [&]() { + // int thread_id = next_thread_id.fetch_add(1); + // tds[thread_id].id = thread_id; + // tds[thread_id].ht = hashtable; - uint64_t start_key = n / num_thread * (uint64_t)thread_id; - uint64_t end_key = start_key + n / num_thread; + // uint64_t start_key = n / num_thread * (uint64_t)thread_id; + // uint64_t end_key = start_key + n / num_thread; - clht_gc_thread_init(tds[thread_id].ht, tds[thread_id].id); - barrier_cross(&barrier); + // clht_gc_thread_init(tds[thread_id].ht, tds[thread_id].id); + // barrier_cross(&barrier); - for (uint64_t i = start_key; i < end_key; i++) { - clht_put(tds[thread_id].ht, keys[i], keys[i]); - } - }; + // for (uint64_t i = start_key; i < end_key; i++) { + // clht_put(tds[thread_id].ht, keys[i], keys[i]); + // } + // }; - std::vector thread_group; + // std::vector thread_group; - for (int i = 0; i < num_thread; i++) - thread_group.push_back(std::thread{func}); + // for (int i = 0; i < num_thread; i++) + // thread_group.push_back(std::thread{func}); - for (int i = 0; i < num_thread; i++) - thread_group[i].join(); - auto duration = std::chrono::duration_cast( - std::chrono::system_clock::now() - starttime); - printf("Throughput: load, %f ,ops/us\n", (n * 1.0) / duration.count()); - } + // for (int i = 0; i < num_thread; i++) + // thread_group[i].join(); + // auto duration = std::chrono::duration_cast( + // std::chrono::system_clock::now() - starttime); + // printf("Throughput: load, %f ,ops/us\n", (n * 1.0) / duration.count()); + // } - barrier.crossing = 0; + // barrier.crossing = 0; { // Run @@ -138,7 +138,7 @@ void run(char **argv) { std::chrono::system_clock::now() - starttime); printf("Throughput: run, %f ,ops/us\n", (n * 1.0) / duration.count()); } - clht_gc_destroy(hashtable); + //clht_gc_destroy(hashtable); delete[] keys; } diff --git a/P-CLHT/include/clht_lb_res.h b/P-CLHT/include/clht_lb_res.h index 01101cef..5984e2f7 100644 --- a/P-CLHT/include/clht_lb_res.h +++ b/P-CLHT/include/clht_lb_res.h @@ -161,7 +161,8 @@ typedef struct ALIGNED(CACHE_LINE_SIZE) bucket_s volatile uint32_t hops; clht_addr_t key[ENTRIES_PER_BUCKET]; clht_val_t val[ENTRIES_PER_BUCKET]; - volatile struct bucket_s* next; + // volatile struct bucket_s* next; + PMEMoid next; } bucket_t; //#if __GNUC__ > 4 && __GNUC_MINOR__ > 4 @@ -409,6 +410,7 @@ lock_acq_rtm_chk_resize(clht_lock_t* lock, clht_hashtable_t* h) /* Create a new hashtable. */ clht_hashtable_t* clht_hashtable_create(uint64_t num_buckets); clht_t* clht_create(uint64_t num_buckets); +clht_t* clht_open(uint64_t num_buckets); /* Insert a key-value pair into a hashtable. */ int clht_put(clht_t* hashtable, clht_addr_t key, clht_val_t val); diff --git a/P-CLHT/src/clht_gc.c b/P-CLHT/src/clht_gc.c index e487f273..0b62017e 100644 --- a/P-CLHT/src/clht_gc.c +++ b/P-CLHT/src/clht_gc.c @@ -48,9 +48,6 @@ clht_gc_thread_init(clht_t* h, int id) assert(ts != NULL); clht_hashtable_t* ht_ptr = pmemobj_direct(h->ht); - printf("ht_ptr: %p\n", ht_ptr); - // printf("ht_offset: %llu\n", h->ht.off); - // printf("ht_uuid: %llu\n", h->ht.pool_uuid_lo); ts->version = ht_ptr->version; ts->id = id; @@ -228,15 +225,12 @@ clht_gc_free(clht_hashtable_t* hashtable) for (bin = 0; bin < num_buckets; bin++) { bucket = ((bucket_t*)pmemobj_direct(hashtable->table)) + bin; - if (bin == 0) - printf("bucket: %p\n", bucket); - bucket = bucket->next; + bucket = pmemobj_direct(bucket->next); while (bucket != NULL) { volatile bucket_t* cur = bucket; - bucket = bucket->next; - // printf("CUR: %p\n", cur); + bucket = pmemobj_direct(bucket->next); PMEMoid cur_oid = pmemobj_oid((void*) cur); pmemobj_free(&cur_oid); } @@ -259,12 +253,12 @@ clht_gc_destroy(clht_t* hashtable) #if !defined(CLHT_LINKED) clht_gc_collect_all(hashtable); clht_gc_free(pmemobj_direct(hashtable->ht)); - PMEMoid ht_oid = pmemobj_oid((void*) hashtable); - pmemobj_free(&ht_oid); + // PMEMoid ht_oid = pmemobj_oid((void*) hashtable); + // pmemobj_free(&ht_oid); #endif - // ssmem_alloc_term(clht_alloc); - free(clht_alloc); + // ssmem_alloc_term(clht_alloc); + //free(clht_alloc); } /* @@ -284,14 +278,12 @@ clht_gc_release(clht_hashtable_t* hashtable) for (bin = 0; bin < num_buckets; bin++) { bucket = ((bucket_t*)pmemobj_direct(hashtable->table)) + bin; - if (bin == 0) - printf("bucket: %p\n", bucket); - bucket = bucket->next; + bucket = pmemobj_direct(bucket->next); while (bucket != NULL) { volatile bucket_t* cur = bucket; - bucket = bucket->next; + bucket = pmemobj_direct(bucket->next); ssmem_release(clht_alloc, (void*) cur); // PMEMoid cur_oid = pmemobj_oid((void*) cur); // pmemobj_free(&cur_oid); diff --git a/P-CLHT/src/clht_lb_res.c b/P-CLHT/src/clht_lb_res.c index 64388cb9..b76f49c6 100644 --- a/P-CLHT/src/clht_lb_res.c +++ b/P-CLHT/src/clht_lb_res.c @@ -172,8 +172,8 @@ static inline void clflush_next_check(char *data, int len, bool fence) #elif CLWB asm volatile(".byte 0x66; xsaveopt %0" : "+m" (*(volatile char *)(ptr))); #endif - if (((bucket_t *)data)->next) - clflush_next_check((char *)(((bucket_t *)data)->next), sizeof(bucket_t), false); + if (pmemobj_direct( ((bucket_t *)data)->next ) ) + clflush_next_check((char *)pmemobj_direct( ((bucket_t *)data)->next ), sizeof(bucket_t), false); while(read_tsc() < etsc) cpu_pause(); } if (fence) @@ -205,7 +205,7 @@ clht_bucket_create() { bucket->key[j] = 0; } - bucket->next = NULL; + bucket->next = OID_NULL; return bucket; } @@ -226,6 +226,34 @@ clht_bucket_create_stats(clht_hashtable_t* h, int* resize) clht_hashtable_t* clht_hashtable_create(uint64_t num_buckets); +clht_t* clht_open(uint64_t num_buckets) { + size_t pool_size = 2*1024*1024*1024UL; + if( access("/mnt/pmem/pool", F_OK ) != -1 ) + { + pop = pmemobj_open("/mnt/pmem/pool", POBJ_LAYOUT_NAME(clht)); + // reload the root + } else + { + perror("Pool does not already exist\n"); + } + + if (pop == NULL) + { + perror("failed to open the pool\n"); + } + + // Create the root pointer + PMEMoid my_root = pmemobj_root(pop, sizeof(clht_t)); + if (pmemobj_direct(my_root) == NULL) + { + perror("root pointer is null\n"); + } + + clht_t* w = pmemobj_direct(my_root); + printf("my_root.off: %d\n", my_root.off); + return w; +} + clht_t* clht_create(uint64_t num_buckets) { @@ -234,6 +262,7 @@ clht_create(uint64_t num_buckets) if( access("/mnt/pmem/pool", F_OK ) != -1 ) { pop = pmemobj_open("/mnt/pmem/pool", POBJ_LAYOUT_NAME(clht)); + // reload the root } else { pop = pmemobj_create("/mnt/pmem/pool", POBJ_LAYOUT_NAME(clht), pool_size, 0666); @@ -252,6 +281,7 @@ clht_create(uint64_t num_buckets) } clht_t* w = pmemobj_direct(my_root); + printf("my_root.off: %d\n", my_root.off); if (w == NULL) { @@ -260,7 +290,7 @@ clht_create(uint64_t num_buckets) } clht_hashtable_t* ht_ptr = clht_hashtable_create(num_buckets); - + printf("clht_create ht_ptr->table.off: %d\n", ht_ptr->table.off); w->ht = pmemobj_oid(ht_ptr); if (ht_ptr == NULL) @@ -275,7 +305,7 @@ clht_create(uint64_t num_buckets) w->version_min = 0; w->ht_oldest = ht_ptr; - // Make sure to change the flushing to the correct offset given the pointer + // This should flush everything to persistent memory clflush((char *)pmemobj_direct(ht_ptr->table), num_buckets * sizeof(bucket_t), true); clflush((char *)ht_ptr, sizeof(clht_hashtable_t), true); clflush((char *)w, sizeof(clht_t), true); @@ -401,8 +431,7 @@ clht_get(clht_hashtable_t* hashtable, clht_addr_t key) } } } - - bucket = bucket->next; + bucket = pmemobj_direct(bucket->next); } while (unlikely(bucket != NULL)); return 0; @@ -421,7 +450,7 @@ bucket_exists(volatile bucket_t* bucket, clht_addr_t key) return true; } } - bucket = bucket->next; + bucket = pmemobj_direct(bucket->next); } while (unlikely(bucket != NULL)); return false; @@ -475,7 +504,7 @@ clht_put(clht_t* h, clht_addr_t key, clht_val_t val) } int resize = 0; - if (likely(bucket->next == NULL)) + if (likely(pmemobj_direct(bucket->next) == NULL)) { if (unlikely(empty == NULL)) { @@ -493,8 +522,10 @@ clht_put(clht_t* h, clht_addr_t key, clht_val_t val) _mm_sfence(); #endif clflush((char *)b, sizeof(bucket_t), true); - bucket->next = b; - clflush((char *)&bucket->next, sizeof(uintptr_t), true); + bucket->next = pmemobj_oid(b); + bucket_t* next_ptr = pmemobj_direct(bucket->next); + // ??? doubt ??? + clflush((char *)&next_ptr, sizeof(uintptr_t), true); } else { @@ -520,7 +551,7 @@ clht_put(clht_t* h, clht_addr_t key, clht_val_t val) } return true; } - bucket = bucket->next; + bucket = pmemobj_direct(bucket->next); } while (true); } @@ -568,7 +599,7 @@ clht_remove(clht_t* h, clht_addr_t key) return val; } } - bucket = bucket->next; + bucket = pmemobj_direct(bucket->next); } while (unlikely(bucket != NULL)); LOCK_RLS(lock); @@ -593,17 +624,19 @@ clht_put_seq(clht_hashtable_t* hashtable, clht_addr_t key, clht_val_t val, uint6 } } - if (bucket->next == NULL) + if (pmemobj_direct(bucket->next) == NULL) { DPP(put_num_failed_expand); int null; - bucket->next = clht_bucket_create_stats(hashtable, &null); - bucket->next->val[0] = val; - bucket->next->key[0] = key; + + bucket->next = pmemobj_oid(clht_bucket_create_stats(hashtable, &null)); + bucket_t* bucket_ptr = pmemobj_direct(bucket->next); + bucket_ptr->val[0] = val; + bucket_ptr->key[0] = key; return true; } - bucket = bucket->next; + bucket = pmemobj_direct(bucket->next); } while (true); } @@ -657,7 +690,7 @@ bucket_cpy(clht_t* h, volatile bucket_t* bucket, clht_hashtable_t* ht_new) clht_put_seq(ht_new, key, bucket->val[j], bin); } } - bucket = bucket->next; + bucket = pmemobj_direct(bucket->next); } while (bucket != NULL); @@ -906,7 +939,7 @@ clht_size(clht_hashtable_t* hashtable) } } - bucket = bucket->next; + bucket = pmemobj_direct(bucket->next); } while (bucket != NULL); } @@ -950,7 +983,7 @@ ht_status(clht_t* h, int resize_increase, int just_print) } } - bucket = bucket->next; + bucket = pmemobj_direct(bucket->next); } while (bucket != NULL); @@ -1064,7 +1097,7 @@ clht_print(clht_hashtable_t* hashtable) } } - bucket = bucket->next; + bucket = pmemobj_direct(bucket->next); printf(" ** -> "); } while (bucket != NULL); @@ -1087,7 +1120,7 @@ void clht_lock_initialization(clht_t *h) for (i = 0; i < ht->num_buckets; i++) { bucket_t* temp = pmemobj_direct(ht->table); temp[i].lock = LOCK_FREE; - for (next = temp[i].next; next != NULL; next = next->next) { + for (next = pmemobj_direct(temp[i].next); next != NULL; next = pmemobj_direct(next->next)) { next->lock = LOCK_FREE; } } From 2e07acae31cc3f81ad7586cbba7cdef98c19591f Mon Sep 17 00:00:00 2001 From: pyrito Date: Sat, 21 Mar 2020 16:05:41 -0500 Subject: [PATCH 11/30] Changed clht_open func param --- P-CLHT/example.cpp | 2 +- P-CLHT/include/clht_lb_res.h | 2 +- P-CLHT/src/clht_lb_res.c | 3 +-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/P-CLHT/example.cpp b/P-CLHT/example.cpp index f8321e1b..8cce997e 100644 --- a/P-CLHT/example.cpp +++ b/P-CLHT/example.cpp @@ -59,7 +59,7 @@ void run(char **argv) { printf("operation,n,ops/s\n"); - clht_t *hashtable = clht_open(512); + clht_t *hashtable = clht_open(); barrier_init(&barrier, num_thread); diff --git a/P-CLHT/include/clht_lb_res.h b/P-CLHT/include/clht_lb_res.h index 5984e2f7..6468a5cf 100644 --- a/P-CLHT/include/clht_lb_res.h +++ b/P-CLHT/include/clht_lb_res.h @@ -410,7 +410,7 @@ lock_acq_rtm_chk_resize(clht_lock_t* lock, clht_hashtable_t* h) /* Create a new hashtable. */ clht_hashtable_t* clht_hashtable_create(uint64_t num_buckets); clht_t* clht_create(uint64_t num_buckets); -clht_t* clht_open(uint64_t num_buckets); +clht_t* clht_open(); /* Insert a key-value pair into a hashtable. */ int clht_put(clht_t* hashtable, clht_addr_t key, clht_val_t val); diff --git a/P-CLHT/src/clht_lb_res.c b/P-CLHT/src/clht_lb_res.c index b76f49c6..00d2f4e7 100644 --- a/P-CLHT/src/clht_lb_res.c +++ b/P-CLHT/src/clht_lb_res.c @@ -226,12 +226,11 @@ clht_bucket_create_stats(clht_hashtable_t* h, int* resize) clht_hashtable_t* clht_hashtable_create(uint64_t num_buckets); -clht_t* clht_open(uint64_t num_buckets) { +clht_t* clht_open() { size_t pool_size = 2*1024*1024*1024UL; if( access("/mnt/pmem/pool", F_OK ) != -1 ) { pop = pmemobj_open("/mnt/pmem/pool", POBJ_LAYOUT_NAME(clht)); - // reload the root } else { perror("Pool does not already exist\n"); From fd33b64d7c3b1d041d23ee50fbfb4321c94880d5 Mon Sep 17 00:00:00 2001 From: pyrito Date: Mon, 23 Mar 2020 17:49:58 -0500 Subject: [PATCH 12/30] First implementation of transactions --- P-CLHT/example.cpp | 56 +++++++++++++++--------------- P-CLHT/src/clht_lb_res.c | 74 ++++++++++++++++++++++++++-------------- 2 files changed, 76 insertions(+), 54 deletions(-) diff --git a/P-CLHT/example.cpp b/P-CLHT/example.cpp index 8cce997e..a6a7806a 100644 --- a/P-CLHT/example.cpp +++ b/P-CLHT/example.cpp @@ -59,7 +59,7 @@ void run(char **argv) { printf("operation,n,ops/s\n"); - clht_t *hashtable = clht_open(); + clht_t *hashtable = clht_create(512); barrier_init(&barrier, num_thread); @@ -67,39 +67,39 @@ void run(char **argv) { std::atomic next_thread_id; - // { - // // Load - // auto starttime = std::chrono::system_clock::now(); - // next_thread_id.store(0); - // auto func = [&]() { - // int thread_id = next_thread_id.fetch_add(1); - // tds[thread_id].id = thread_id; - // tds[thread_id].ht = hashtable; + { + // Load + auto starttime = std::chrono::system_clock::now(); + next_thread_id.store(0); + auto func = [&]() { + int thread_id = next_thread_id.fetch_add(1); + tds[thread_id].id = thread_id; + tds[thread_id].ht = hashtable; - // uint64_t start_key = n / num_thread * (uint64_t)thread_id; - // uint64_t end_key = start_key + n / num_thread; + uint64_t start_key = n / num_thread * (uint64_t)thread_id; + uint64_t end_key = start_key + n / num_thread; - // clht_gc_thread_init(tds[thread_id].ht, tds[thread_id].id); - // barrier_cross(&barrier); + clht_gc_thread_init(tds[thread_id].ht, tds[thread_id].id); + barrier_cross(&barrier); - // for (uint64_t i = start_key; i < end_key; i++) { - // clht_put(tds[thread_id].ht, keys[i], keys[i]); - // } - // }; + for (uint64_t i = start_key; i < end_key; i++) { + clht_put(tds[thread_id].ht, keys[i], keys[i]); + } + }; - // std::vector thread_group; + std::vector thread_group; - // for (int i = 0; i < num_thread; i++) - // thread_group.push_back(std::thread{func}); + for (int i = 0; i < num_thread; i++) + thread_group.push_back(std::thread{func}); - // for (int i = 0; i < num_thread; i++) - // thread_group[i].join(); - // auto duration = std::chrono::duration_cast( - // std::chrono::system_clock::now() - starttime); - // printf("Throughput: load, %f ,ops/us\n", (n * 1.0) / duration.count()); - // } + for (int i = 0; i < num_thread; i++) + thread_group[i].join(); + auto duration = std::chrono::duration_cast( + std::chrono::system_clock::now() - starttime); + printf("Throughput: load, %f ,ops/us\n", (n * 1.0) / duration.count()); + } - // barrier.crossing = 0; + barrier.crossing = 0; { // Run @@ -138,7 +138,7 @@ void run(char **argv) { std::chrono::system_clock::now() - starttime); printf("Throughput: run, %f ,ops/us\n", (n * 1.0) / duration.count()); } - //clht_gc_destroy(hashtable); + clht_gc_destroy(hashtable); delete[] keys; } diff --git a/P-CLHT/src/clht_lb_res.c b/P-CLHT/src/clht_lb_res.c index 00d2f4e7..2432e3ce 100644 --- a/P-CLHT/src/clht_lb_res.c +++ b/P-CLHT/src/clht_lb_res.c @@ -288,9 +288,16 @@ clht_create(uint64_t num_buckets) return NULL; } - clht_hashtable_t* ht_ptr = clht_hashtable_create(num_buckets); - printf("clht_create ht_ptr->table.off: %d\n", ht_ptr->table.off); - w->ht = pmemobj_oid(ht_ptr); + clht_hashtable_t* ht_ptr; + + // Transactional allocation + TX_BEGIN(pop) { + ht_ptr = clht_hashtable_create(num_buckets); + printf("clht_create ht_ptr->table.off: %d\n", ht_ptr->table.off); + w->ht = pmemobj_oid(ht_ptr); + } TX_ONABORT { + printf("Failed clht_hashtable_create, rolling back\n"); + } TX_END; if (ht_ptr == NULL) { @@ -330,7 +337,7 @@ clht_hashtable_create(uint64_t num_buckets) if (pmemobj_alloc(pop, &ht_oid, sizeof(clht_hashtable_t), 0, 0, 0)) { fprintf(stderr, "pmemobj_alloc failed for clht_hashtable_create\n"); assert(0); - } + } hashtable = pmemobj_direct(ht_oid); if (hashtable == NULL) @@ -345,7 +352,7 @@ clht_hashtable_create(uint64_t num_buckets) if (pmemobj_alloc(pop, &table_oid, num_buckets * sizeof(bucket_t), 0, 0, 0)) { fprintf(stderr, "pmemobj_alloc failed for table_oid in clht_hashtable_create\n"); assert(0); - } + } hashtable->table = table_oid; bucket_t* bucket_ptr = pmemobj_direct(hashtable->table); @@ -508,23 +515,26 @@ clht_put(clht_t* h, clht_addr_t key, clht_val_t val) if (unlikely(empty == NULL)) { DPP(put_num_failed_expand); - - bucket_t* b = clht_bucket_create_stats(hashtable, &resize); - b->val[0] = val; + TX_BEGIN(pop) { + bucket_t* b = clht_bucket_create_stats(hashtable, &resize); + b->val[0] = val; #ifdef __tile__ - /* keep the writes in order */ - _mm_sfence(); + /* keep the writes in order */ + _mm_sfence(); #endif - b->key[0] = key; + b->key[0] = key; #ifdef __tile__ - /* make sure they are visible */ - _mm_sfence(); + /* make sure they are visible */ + _mm_sfence(); #endif - clflush((char *)b, sizeof(bucket_t), true); - bucket->next = pmemobj_oid(b); - bucket_t* next_ptr = pmemobj_direct(bucket->next); - // ??? doubt ??? - clflush((char *)&next_ptr, sizeof(uintptr_t), true); + clflush((char *)b, sizeof(bucket_t), true); + bucket->next = pmemobj_oid(b); + bucket_t* next_ptr = pmemobj_direct(bucket->next); + + clflush((char *)&next_ptr, sizeof(uintptr_t), true); + } TX_ONABORT { + printf("Failed clht_put, rolling back\n"); + } TX_END } else { @@ -627,11 +637,14 @@ clht_put_seq(clht_hashtable_t* hashtable, clht_addr_t key, clht_val_t val, uint6 { DPP(put_num_failed_expand); int null; - - bucket->next = pmemobj_oid(clht_bucket_create_stats(hashtable, &null)); - bucket_t* bucket_ptr = pmemobj_direct(bucket->next); - bucket_ptr->val[0] = val; - bucket_ptr->key[0] = key; + TX_BEGIN(pop) { + bucket->next = pmemobj_oid(clht_bucket_create_stats(hashtable, &null)); + bucket_t* bucket_ptr = pmemobj_direct(bucket->next); + bucket_ptr->val[0] = val; + bucket_ptr->key[0] = key; + } TX_ONABORT { + printf("Failed clht_put_seq, rolling back\n"); + } TX_END return true; } @@ -783,7 +796,7 @@ ht_resize_pes(clht_t* h, int is_increase, int by) size_t b; for (b = 0; b < ht_old->num_buckets; b++) - { + { bucket_t* bu_cur = (bucket_t*)(pmemobj_direct(ht_old->table)) + b; int ret = bucket_cpy(h, bu_cur, ht_new); if (ret == -1) @@ -1005,7 +1018,11 @@ ht_status(clht_t* h, int resize_increase, int just_print) { // printf("[STATUS-%02d] #bu: %7zu / #elems: %7zu / full%%: %8.4f%% / expands: %4d / max expands: %2d\n", // clht_gc_get_id(), hashtable->num_buckets, size, full_ratio, expands, expands_max); - ht_resize_pes(h, 0, 33); + TX_BEGIN(pop) { + ht_resize_pes(h, 0, 33); + } TX_ONABORT { + printf("Failed ht_resize_pes, rolling back\n"); + } TX_END } else if ((full_ratio > 0 && full_ratio > CLHT_PERC_FULL_DOUBLE) || expands_max > CLHT_MAX_EXPANSIONS || resize_increase) @@ -1020,7 +1037,12 @@ ht_status(clht_t* h, int resize_increase, int just_print) inc_by_pow2 = 2; } DEBUG_PRINT("Callig ht_resize_pes\n"); - int ret = ht_resize_pes(h, 1, inc_by_pow2); + int ret = 0; + TX_BEGIN(pop) { + ret = ht_resize_pes(h, 1, inc_by_pow2); + } TX_ONABORT { + printf("Failed ht_resize_pes, rolling back\n"); + } TX_END; // return if crashed if (ret == -1) return 0; From 824f5e2714b3c8e4103d567d9dc56d1bf7fbff2c Mon Sep 17 00:00:00 2001 From: pyrito Date: Mon, 30 Mar 2020 21:36:05 -0500 Subject: [PATCH 13/30] Trimmed use of transactions. WIP. --- .vscode/settings.json | 4 +- P-CLHT/CMakeLists.txt | 7 ++ P-CLHT/src/clht_lb_res.c | 113 +++++++++++++++------------- P-CLHT/test.cpp | 155 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 226 insertions(+), 53 deletions(-) create mode 100644 P-CLHT/test.cpp diff --git a/.vscode/settings.json b/.vscode/settings.json index 2849a3c4..d9fa1439 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -3,6 +3,8 @@ "mutex": "c", "shared_mutex": "c", "condition_variable": "c", - "chrono": "cpp" + "chrono": "cpp", + "random": "c", + "algorithm": "c" } } \ No newline at end of file diff --git a/P-CLHT/CMakeLists.txt b/P-CLHT/CMakeLists.txt index ef380494..07b4bfab 100644 --- a/P-CLHT/CMakeLists.txt +++ b/P-CLHT/CMakeLists.txt @@ -52,3 +52,10 @@ add_executable(example ${P_CLHT_TEST}) target_link_libraries(example ${TbbLib} ${JemallocLib} ${LIBPMEMOBJ++_LIBRARIES} boost_system boost_thread pthread) + +set(P_CLHT_TEST test.cpp src/clht_lb_res.c src/clht_gc.c + external/sspfd/sspfd.c external/ssmem/src/ssmem.c) +add_executable(test ${P_CLHT_TEST}) + +target_link_libraries(test ${TbbLib} ${JemallocLib} ${LIBPMEMOBJ++_LIBRARIES} boost_system + boost_thread pthread) diff --git a/P-CLHT/src/clht_lb_res.c b/P-CLHT/src/clht_lb_res.c index 2432e3ce..18d7566c 100644 --- a/P-CLHT/src/clht_lb_res.c +++ b/P-CLHT/src/clht_lb_res.c @@ -180,13 +180,27 @@ static inline void clflush_next_check(char *data, int len, bool fence) mfence(); } +static int bucket_init(PMEMobjpool *pop_arg, void *ptr, void *arg) +{ + bucket_t* bucket = ptr; + bucket->lock = 0; + + uint32_t j; + for (j = 0; j < ENTRIES_PER_BUCKET; j++) + { + bucket->key[j] = 0; + } + bucket->next = OID_NULL; + return 0; +} + /* Create a new bucket. */ bucket_t* clht_bucket_create() { bucket_t* bucket = NULL; PMEMoid bucket_oid; - if (pmemobj_alloc(pop, &bucket_oid, sizeof(bucket_t), 0, 0, 0)) + if (pmemobj_alloc(pop, &bucket_oid, sizeof(bucket_t), 0, bucket_init, 0)) { fprintf(stderr, "pmemobj_alloc failed for clht_bucket_create\n"); assert(0); @@ -197,16 +211,6 @@ clht_bucket_create() { return NULL; } - - bucket->lock = 0; - - uint32_t j; - for (j = 0; j < ENTRIES_PER_BUCKET; j++) - { - bucket->key[j] = 0; - } - bucket->next = OID_NULL; - return bucket; } @@ -288,12 +292,12 @@ clht_create(uint64_t num_buckets) return NULL; } - clht_hashtable_t* ht_ptr; + clht_hashtable_t* ht_ptr = clht_hashtable_create(num_buckets);; + printf("clht_create ht_ptr->table.off: %d\n", ht_ptr->table.off); // Transactional allocation TX_BEGIN(pop) { - ht_ptr = clht_hashtable_create(num_buckets); - printf("clht_create ht_ptr->table.off: %d\n", ht_ptr->table.off); + //pmemobj_tx_abort(-1); w->ht = pmemobj_oid(ht_ptr); } TX_ONABORT { printf("Failed clht_hashtable_create, rolling back\n"); @@ -316,7 +320,6 @@ clht_create(uint64_t num_buckets) clflush((char *)ht_ptr, sizeof(clht_hashtable_t), true); clflush((char *)w, sizeof(clht_t), true); - // pmemobj_close(pop); return w; } @@ -342,18 +345,18 @@ clht_hashtable_create(uint64_t num_buckets) if (hashtable == NULL) { - printf("** malloc @ hatshtalbe\n"); + printf("** malloc @ hashtable\n"); return NULL; } /* hashtable->table = calloc(num_buckets, (sizeof(bucket_t))); */ // hashtable->table = (bucket_t*) memalign(CACHE_LINE_SIZE, num_buckets * (sizeof(bucket_t))); PMEMoid table_oid; - if (pmemobj_alloc(pop, &table_oid, num_buckets * sizeof(bucket_t), 0, 0, 0)) { + hashtable->table = table_oid; + if (pmemobj_alloc(pop, &hashtable->table, num_buckets * sizeof(bucket_t), 0, 0, 0)) { fprintf(stderr, "pmemobj_alloc failed for table_oid in clht_hashtable_create\n"); assert(0); } - hashtable->table = table_oid; bucket_t* bucket_ptr = pmemobj_direct(hashtable->table); if (bucket_ptr == NULL) @@ -365,32 +368,33 @@ clht_hashtable_create(uint64_t num_buckets) memset(bucket_ptr, 0, num_buckets * (sizeof(bucket_t))); - uint64_t i; - for (i = 0; i < num_buckets; i++) - { - bucket_ptr[i].lock = LOCK_FREE; - uint32_t j; - for (j = 0; j < ENTRIES_PER_BUCKET; j++) + TX_BEGIN(pop) { + uint64_t i; + for (i = 0; i < num_buckets; i++) { - bucket_ptr[i].key[j] = 0; + bucket_ptr[i].lock = LOCK_FREE; + uint32_t j; + for (j = 0; j < ENTRIES_PER_BUCKET; j++) + { + bucket_ptr[i].key[j] = 0; + } } - } - - hashtable->num_buckets = num_buckets; - hashtable->hash = num_buckets - 1; - hashtable->version = 0; - hashtable->table_tmp = NULL; - hashtable->table_new = NULL; - hashtable->table_prev = NULL; - hashtable->num_expands = 0; - hashtable->num_expands_threshold = (CLHT_PERC_EXPANSIONS * num_buckets); - if (hashtable->num_expands_threshold == 0) - { - hashtable->num_expands_threshold = 1; - } - hashtable->is_helper = 1; - hashtable->helper_done = 0; + hashtable->num_buckets = num_buckets; + hashtable->hash = num_buckets - 1; + hashtable->version = 0; + hashtable->table_tmp = NULL; + hashtable->table_new = NULL; + hashtable->table_prev = NULL; + hashtable->num_expands = 0; + hashtable->num_expands_threshold = (CLHT_PERC_EXPANSIONS * num_buckets); + if (hashtable->num_expands_threshold == 0) + { + hashtable->num_expands_threshold = 1; + } + hashtable->is_helper = 1; + hashtable->helper_done = 0; + } TX_END return hashtable; } @@ -515,8 +519,8 @@ clht_put(clht_t* h, clht_addr_t key, clht_val_t val) if (unlikely(empty == NULL)) { DPP(put_num_failed_expand); + bucket_t* b = clht_bucket_create_stats(hashtable, &resize);; TX_BEGIN(pop) { - bucket_t* b = clht_bucket_create_stats(hashtable, &resize); b->val[0] = val; #ifdef __tile__ /* keep the writes in order */ @@ -527,14 +531,14 @@ clht_put(clht_t* h, clht_addr_t key, clht_val_t val) /* make sure they are visible */ _mm_sfence(); #endif + } TX_FINALLY { clflush((char *)b, sizeof(bucket_t), true); bucket->next = pmemobj_oid(b); - bucket_t* next_ptr = pmemobj_direct(bucket->next); - - clflush((char *)&next_ptr, sizeof(uintptr_t), true); } TX_ONABORT { printf("Failed clht_put, rolling back\n"); } TX_END + bucket_t* next_ptr = pmemobj_direct(bucket->next); + clflush((char *)&next_ptr, sizeof(uintptr_t), true); } else { @@ -602,8 +606,11 @@ clht_remove(clht_t* h, clht_addr_t key) if (bucket->key[j] == key) { clht_val_t val = bucket->val[j]; - bucket->key[j] = 0; - clflush((char *)&bucket->key[j], sizeof(uintptr_t), true); + // May not need this, if there is a crash, remove will not be persisted + TX_BEGIN(pop) { + bucket->key[j] = 0; + clflush((char *)&bucket->key[j], sizeof(uintptr_t), true); + } TX_END LOCK_RLS(lock); return val; } @@ -627,8 +634,10 @@ clht_put_seq(clht_hashtable_t* hashtable, clht_addr_t key, clht_val_t val, uint6 { if (bucket->key[j] == 0) { - bucket->val[j] = val; - bucket->key[j] = key; + TX_BEGIN(pop) { + bucket->val[j] = val; + bucket->key[j] = key; + } TX_END return true; } } @@ -637,9 +646,9 @@ clht_put_seq(clht_hashtable_t* hashtable, clht_addr_t key, clht_val_t val, uint6 { DPP(put_num_failed_expand); int null; + bucket->next = pmemobj_oid(clht_bucket_create()); + bucket_t* bucket_ptr = pmemobj_direct(bucket->next); TX_BEGIN(pop) { - bucket->next = pmemobj_oid(clht_bucket_create_stats(hashtable, &null)); - bucket_t* bucket_ptr = pmemobj_direct(bucket->next); bucket_ptr->val[0] = val; bucket_ptr->key[0] = key; } TX_ONABORT { @@ -744,7 +753,7 @@ ht_resize_pes(clht_t* h, int is_increase, int by) check_ht_status_steps = CLHT_STATUS_INVOK; clht_hashtable_t* ht_old = pmemobj_direct(h->ht); - + if (TRYLOCK_ACQ(&h->resize_lock)) { return 0; diff --git a/P-CLHT/test.cpp b/P-CLHT/test.cpp new file mode 100644 index 00000000..89d3d116 --- /dev/null +++ b/P-CLHT/test.cpp @@ -0,0 +1,155 @@ +#include +#include +#include +#include +#include +#include +#include "tbb/tbb.h" + +#include + +#include "clht_lb_res.h" +#include "ssmem.h" + +typedef struct thread_data { + uint32_t id; + clht_t *ht; +} thread_data_t; + +typedef struct barrier { + pthread_cond_t complete; + pthread_mutex_t mutex; + int count; + int crossing; +} barrier_t; + +void barrier_init(barrier_t *b, int n) { + pthread_cond_init(&b->complete, NULL); + pthread_mutex_init(&b->mutex, NULL); + b->count = n; + b->crossing = 0; +} + +void barrier_cross(barrier_t *b) { + pthread_mutex_lock(&b->mutex); + b->crossing++; + if (b->crossing < b->count) { + pthread_cond_wait(&b->complete, &b->mutex); + } else { + pthread_cond_broadcast(&b->complete); + b->crossing = 0; + } + pthread_mutex_unlock(&b->mutex); +} + +barrier_t barrier; + +void run(char **argv) { + std::cout << "Simple Example of P-CLHT" << std::endl; + + uint64_t n = std::atoll(argv[1]); + uint64_t *keys = new uint64_t[n]; + + // Generate keys + for (uint64_t i = 0; i < n; i++) { + keys[i] = i + 1; + } + + int num_thread = atoi(argv[2]); + + printf("operation,n,ops/s\n"); + + clht_t *hashtable = clht_create(512); + + barrier_init(&barrier, num_thread); + + thread_data_t *tds = (thread_data_t *) malloc(num_thread * sizeof(thread_data_t)); + + std::atomic next_thread_id; + + { + // Load + auto starttime = std::chrono::system_clock::now(); + next_thread_id.store(0); + auto func = [&]() { + int thread_id = next_thread_id.fetch_add(1); + tds[thread_id].id = thread_id; + tds[thread_id].ht = hashtable; + + uint64_t start_key = n / num_thread * (uint64_t)thread_id; + uint64_t end_key = start_key + n / num_thread; + + clht_gc_thread_init(tds[thread_id].ht, tds[thread_id].id); + barrier_cross(&barrier); + + for (uint64_t i = start_key; i < end_key; i++) { + // Simulate a crash while inserting some key + clht_put(tds[thread_id].ht, keys[i], keys[i]); + } + }; + + std::vector thread_group; + + for (int i = 0; i < num_thread; i++) + thread_group.push_back(std::thread{func}); + + for (int i = 0; i < num_thread; i++) + thread_group[i].join(); + auto duration = std::chrono::duration_cast( + std::chrono::system_clock::now() - starttime); + printf("Throughput: load, %f ,ops/us\n", (n * 1.0) / duration.count()); + } + + barrier.crossing = 0; + + { + // Run + auto starttime = std::chrono::system_clock::now(); + next_thread_id.store(0); + auto func = [&]() { + int thread_id = next_thread_id.fetch_add(1); + tds[thread_id].id = thread_id; + tds[thread_id].ht = hashtable; + + uint64_t start_key = n / num_thread * (uint64_t)thread_id; + uint64_t end_key = start_key + n / num_thread; + + clht_gc_thread_init(tds[thread_id].ht, tds[thread_id].id); + barrier_cross(&barrier); + + for (uint64_t i = start_key; i < end_key; i++) { + PMEMoid pmem_ht = (PMEMoid)((tds[thread_id].ht)->ht); + clht_hashtable_t *ht = (clht_hashtable_t*)pmemobj_direct(pmem_ht); + uintptr_t val = clht_get(ht, keys[i]); + if (val != keys[i]) { + std::cout << "[CLHT] wrong key read: " << val << "expected: " << keys[i] << std::endl; + exit(1); + } + } + }; + + std::vector thread_group; + + for (int i = 0; i < num_thread; i++) + thread_group.push_back(std::thread{func}); + + for (int i = 0; i < num_thread; i++) + thread_group[i].join(); + auto duration = std::chrono::duration_cast( + std::chrono::system_clock::now() - starttime); + printf("Throughput: run, %f ,ops/us\n", (n * 1.0) / duration.count()); + } + clht_gc_destroy(hashtable); + + delete[] keys; +} + +int main(int argc, char **argv) { + if (argc != 3) { + printf("usage: %s [n] [nthreads]\nn: number of keys (integer)\nnthreads: number of threads (integer)\n", argv[0]); + return 1; + } + + run(argv); + return 0; +} \ No newline at end of file From bd951e6d2127d36587f66fedb78d4cd63d754863 Mon Sep 17 00:00:00 2001 From: pyrito Date: Tue, 31 Mar 2020 21:16:57 -0500 Subject: [PATCH 14/30] Working on cache alignment fix. WIP. --- P-CLHT/example.cpp | 3 +- P-CLHT/include/clht_lb_res.h | 11 +++++- P-CLHT/src/clht_gc.c | 17 +++++---- P-CLHT/src/clht_lb_res.c | 71 ++++++++++++++++++++---------------- P-CLHT/test.cpp | 3 +- 5 files changed, 60 insertions(+), 45 deletions(-) diff --git a/P-CLHT/example.cpp b/P-CLHT/example.cpp index a6a7806a..8f028765 100644 --- a/P-CLHT/example.cpp +++ b/P-CLHT/example.cpp @@ -117,8 +117,7 @@ void run(char **argv) { barrier_cross(&barrier); for (uint64_t i = start_key; i < end_key; i++) { - PMEMoid pmem_ht = (PMEMoid)((tds[thread_id].ht)->ht); - clht_hashtable_t *ht = (clht_hashtable_t*)pmemobj_direct(pmem_ht); + clht_hashtable_t *ht = (clht_hashtable_t*)clht_ptr_from_off((tds[thread_id].ht)->ht_off); uintptr_t val = clht_get(ht, keys[i]); if (val != keys[i]) { std::cout << "[CLHT] wrong key read: " << val << "expected: " << keys[i] << std::endl; diff --git a/P-CLHT/include/clht_lb_res.h b/P-CLHT/include/clht_lb_res.h index 6468a5cf..99ed062d 100644 --- a/P-CLHT/include/clht_lb_res.h +++ b/P-CLHT/include/clht_lb_res.h @@ -175,7 +175,8 @@ typedef struct ALIGNED(CACHE_LINE_SIZE) clht { struct { - PMEMoid ht; + // PMEMoid ht; + uint64_t ht_off; // struct clht_hashtable_s* ht; uint8_t next_cache_line[CACHE_LINE_SIZE - (sizeof(void*))]; // Prob need to add TOID to this as well @@ -197,7 +198,8 @@ typedef struct ALIGNED(CACHE_LINE_SIZE) clht_hashtable_s struct { size_t num_buckets; - PMEMoid table; + // PMEMoid table; + uint64_t table_off; //bucket_t* table; size_t hash; size_t version; @@ -412,6 +414,7 @@ clht_hashtable_t* clht_hashtable_create(uint64_t num_buckets); clht_t* clht_create(uint64_t num_buckets); clht_t* clht_open(); + /* Insert a key-value pair into a hashtable. */ int clht_put(clht_t* hashtable, clht_addr_t key, clht_val_t val); @@ -455,6 +458,10 @@ POBJ_LAYOUT_TOID(clht, clht_hashtable_t); POBJ_LAYOUT_TOID(clht, bucket_t); POBJ_LAYOUT_END(clht); +/* Global pool uuid */ +uint64_t pool_uuid; +void* clht_ptr_from_off(uint64_t offset); + #ifdef __cplusplus } #endif diff --git a/P-CLHT/src/clht_gc.c b/P-CLHT/src/clht_gc.c index 0b62017e..a34a6dcd 100644 --- a/P-CLHT/src/clht_gc.c +++ b/P-CLHT/src/clht_gc.c @@ -47,7 +47,7 @@ clht_gc_thread_init(clht_t* h, int id) ht_ts_t* ts = (ht_ts_t*) memalign(CACHE_LINE_SIZE, sizeof(ht_ts_t)); assert(ts != NULL); - clht_hashtable_t* ht_ptr = pmemobj_direct(h->ht); + clht_hashtable_t* ht_ptr = clht_ptr_from_off(h->ht_off); ts->version = ht_ptr->version; ts->id = id; @@ -129,7 +129,7 @@ clht_gc_min_version_used(clht_t* h) { volatile ht_ts_t* cur = h->version_list; - clht_hashtable_t* ht_ptr = pmemobj_direct(h->ht); + clht_hashtable_t* ht_ptr = clht_ptr_from_off(h->ht_off); size_t min = ht_ptr->version; while (cur != NULL) { @@ -151,7 +151,7 @@ clht_gc_min_version_used(clht_t* h) static int clht_gc_collect_cond(clht_t* hashtable, int collect_not_referenced_only) { - clht_hashtable_t* ht_ptr = pmemobj_direct(hashtable->ht); + clht_hashtable_t* ht_ptr = clht_ptr_from_off(hashtable->ht_off); /* if version_min >= current version there is nothing to collect! */ if ((hashtable->version_min >= ht_ptr->version) || TRYLOCK_ACQ(&hashtable->gc_lock)) { @@ -224,7 +224,7 @@ clht_gc_free(clht_hashtable_t* hashtable) uint64_t bin; for (bin = 0; bin < num_buckets; bin++) { - bucket = ((bucket_t*)pmemobj_direct(hashtable->table)) + bin; + bucket = ((bucket_t*)clht_ptr_from_off(hashtable->table_off)) + bin; bucket = pmemobj_direct(bucket->next); while (bucket != NULL) @@ -237,7 +237,8 @@ clht_gc_free(clht_hashtable_t* hashtable) } #endif - pmemobj_free(&(hashtable->table)); + PMEMoid table_oid = {pool_uuid, hashtable->table_off}; + pmemobj_free(&(table_oid)); PMEMoid ht_oid = pmemobj_oid((void*) hashtable); pmemobj_free(&ht_oid); @@ -252,7 +253,7 @@ clht_gc_destroy(clht_t* hashtable) { #if !defined(CLHT_LINKED) clht_gc_collect_all(hashtable); - clht_gc_free(pmemobj_direct(hashtable->ht)); + clht_gc_free(clht_ptr_from_off(hashtable->ht_off)); // PMEMoid ht_oid = pmemobj_oid((void*) hashtable); // pmemobj_free(&ht_oid); #endif @@ -277,7 +278,7 @@ clht_gc_release(clht_hashtable_t* hashtable) uint64_t bin; for (bin = 0; bin < num_buckets; bin++) { - bucket = ((bucket_t*)pmemobj_direct(hashtable->table)) + bin; + bucket = ((bucket_t*)clht_ptr_from_off(hashtable->table_off)) + bin; bucket = pmemobj_direct(bucket->next); while (bucket != NULL) @@ -291,7 +292,7 @@ clht_gc_release(clht_hashtable_t* hashtable) } #endif - ssmem_release(clht_alloc, pmemobj_direct(hashtable->table)); + ssmem_release(clht_alloc, clht_ptr_from_off(hashtable->table_off)); ssmem_release(clht_alloc, hashtable); // pmemobj_free(&(hashtable->table)); // PMEMoid ht_oid = pmemobj_oid((void*) hashtable); diff --git a/P-CLHT/src/clht_lb_res.c b/P-CLHT/src/clht_lb_res.c index 18d7566c..24e7c8ad 100644 --- a/P-CLHT/src/clht_lb_res.c +++ b/P-CLHT/src/clht_lb_res.c @@ -180,6 +180,12 @@ static inline void clflush_next_check(char *data, int len, bool fence) mfence(); } +void* clht_ptr_from_off(uint64_t offset) +{ + PMEMoid oid = {pool_uuid, offset}; + return pmemobj_direct(oid); +} + static int bucket_init(PMEMobjpool *pop_arg, void *ptr, void *arg) { bucket_t* bucket = ptr; @@ -251,6 +257,7 @@ clht_t* clht_open() { { perror("root pointer is null\n"); } + pool_uuid = my_root.pool_uuid_lo; clht_t* w = pmemobj_direct(my_root); printf("my_root.off: %d\n", my_root.off); @@ -282,6 +289,7 @@ clht_create(uint64_t num_buckets) { perror("root pointer is null\n"); } + pool_uuid = my_root.pool_uuid_lo; clht_t* w = pmemobj_direct(my_root); printf("my_root.off: %d\n", my_root.off); @@ -293,12 +301,12 @@ clht_create(uint64_t num_buckets) } clht_hashtable_t* ht_ptr = clht_hashtable_create(num_buckets);; - printf("clht_create ht_ptr->table.off: %d\n", ht_ptr->table.off); + printf("clht_create ht_ptr->table.off: %d\n", ht_ptr->table_off); // Transactional allocation TX_BEGIN(pop) { //pmemobj_tx_abort(-1); - w->ht = pmemobj_oid(ht_ptr); + w->ht_off = pmemobj_oid(ht_ptr).off; } TX_ONABORT { printf("Failed clht_hashtable_create, rolling back\n"); } TX_END; @@ -316,7 +324,7 @@ clht_create(uint64_t num_buckets) w->ht_oldest = ht_ptr; // This should flush everything to persistent memory - clflush((char *)pmemobj_direct(ht_ptr->table), num_buckets * sizeof(bucket_t), true); + clflush((char *)clht_ptr_from_off(ht_ptr->table_off), num_buckets * sizeof(bucket_t), true); clflush((char *)ht_ptr, sizeof(clht_hashtable_t), true); clflush((char *)w, sizeof(clht_t), true); @@ -352,12 +360,12 @@ clht_hashtable_create(uint64_t num_buckets) /* hashtable->table = calloc(num_buckets, (sizeof(bucket_t))); */ // hashtable->table = (bucket_t*) memalign(CACHE_LINE_SIZE, num_buckets * (sizeof(bucket_t))); PMEMoid table_oid; - hashtable->table = table_oid; - if (pmemobj_alloc(pop, &hashtable->table, num_buckets * sizeof(bucket_t), 0, 0, 0)) { + if (pmemobj_alloc(pop, &table_oid, num_buckets * sizeof(bucket_t), 0, 0, 0)) { fprintf(stderr, "pmemobj_alloc failed for table_oid in clht_hashtable_create\n"); assert(0); } - bucket_t* bucket_ptr = pmemobj_direct(hashtable->table); + hashtable->table_off = table_oid.off; + bucket_t* bucket_ptr = clht_ptr_from_off(hashtable->table_off); if (bucket_ptr == NULL) { @@ -418,7 +426,7 @@ clht_get(clht_hashtable_t* hashtable, clht_addr_t key) { size_t bin = clht_hash(hashtable, key); CLHT_GC_HT_VERSION_USED(hashtable); - volatile bucket_t* bucket = ((bucket_t*)pmemobj_direct(hashtable->table)) + bin; + volatile bucket_t* bucket = ((bucket_t*)clht_ptr_from_off(hashtable->table_off)) + bin; uint32_t j; do @@ -470,9 +478,9 @@ bucket_exists(volatile bucket_t* bucket, clht_addr_t key) int clht_put(clht_t* h, clht_addr_t key, clht_val_t val) { - clht_hashtable_t* hashtable = pmemobj_direct(h->ht); + clht_hashtable_t* hashtable = clht_ptr_from_off(h->ht_off); size_t bin = clht_hash(hashtable, key); - volatile bucket_t* bucket = ((bucket_t*)pmemobj_direct(hashtable->table)) + bin; + volatile bucket_t* bucket = ((bucket_t*)clht_ptr_from_off(hashtable->table_off)) + bin; #if CLHT_READ_ONLY_FAIL == 1 if (bucket_exists(bucket, key)) @@ -484,10 +492,10 @@ clht_put(clht_t* h, clht_addr_t key, clht_val_t val) clht_lock_t* lock = &bucket->lock; while (!LOCK_ACQ(lock, hashtable)) { - hashtable = pmemobj_direct(h->ht); + hashtable = clht_ptr_from_off(h->ht_off); size_t bin = clht_hash(hashtable, key); - bucket = ((bucket_t*)pmemobj_direct(hashtable->table)) + bin; + bucket = ((bucket_t*)clht_ptr_from_off(hashtable->table_off)) + bin; lock = &bucket->lock; } @@ -512,7 +520,7 @@ clht_put(clht_t* h, clht_addr_t key, clht_val_t val) empty_v = &bucket->val[j]; } } - + int resize = 0; if (likely(pmemobj_direct(bucket->next) == NULL)) { @@ -557,7 +565,6 @@ clht_put(clht_t* h, clht_addr_t key, clht_val_t val) /* ht_resize_pes(h, 1); */ DEBUG_PRINT("Calling ht_status for key %ld\n", (long)key); int ret = ht_status(h, 1, 0); - // if crash, return true, because the insert anyway succeeded if (ret == 0) return true; @@ -567,6 +574,7 @@ clht_put(clht_t* h, clht_addr_t key, clht_val_t val) bucket = pmemobj_direct(bucket->next); } while (true); + } @@ -574,9 +582,9 @@ clht_put(clht_t* h, clht_addr_t key, clht_val_t val) clht_val_t clht_remove(clht_t* h, clht_addr_t key) { - clht_hashtable_t* hashtable = pmemobj_direct(h->ht); + clht_hashtable_t* hashtable = clht_ptr_from_off(h->ht_off); size_t bin = clht_hash(hashtable, key); - volatile bucket_t* bucket = ((bucket_t*)pmemobj_direct(hashtable->table)) + bin; + volatile bucket_t* bucket = ((bucket_t*)clht_ptr_from_off(hashtable->table_off)) + bin; #if CLHT_READ_ONLY_FAIL == 1 if (!bucket_exists(bucket, key)) @@ -588,10 +596,10 @@ clht_remove(clht_t* h, clht_addr_t key) clht_lock_t* lock = &bucket->lock; while (!LOCK_ACQ(lock, hashtable)) { - hashtable = pmemobj_direct(h->ht); + hashtable = clht_ptr_from_off(h->ht_off); size_t bin = clht_hash(hashtable, key); - bucket = ((bucket_t*)pmemobj_direct(hashtable->table)) + bin; + bucket = ((bucket_t*)clht_ptr_from_off(hashtable->table_off)) + bin; lock = &bucket->lock; } @@ -625,7 +633,7 @@ clht_remove(clht_t* h, clht_addr_t key) static uint32_t clht_put_seq(clht_hashtable_t* hashtable, clht_addr_t key, clht_val_t val, uint64_t bin) { - volatile bucket_t* bucket = ((bucket_t*)pmemobj_direct(hashtable->table)) + bin; + volatile bucket_t* bucket = ((bucket_t*)clht_ptr_from_off(hashtable->table_off)) + bin; uint32_t j; do @@ -731,7 +739,7 @@ ht_resize_help(clht_hashtable_t* h) /* hash = num_buckets - 1 */ for (b = h->hash; b >= 0; b--) { - bucket_t* bu_cur = ((bucket_t*)pmemobj_direct(h->table)) + b; + bucket_t* bu_cur = ((bucket_t*)clht_ptr_from_off(h->table_off)) + b; if (!bucket_cpy((clht_t *)h, bu_cur, h->table_tmp)) { /* reached a point where the resizer is handling */ /* printf("[GC-%02d] helped #buckets: %10zu = %5.1f%%\n", */ @@ -752,7 +760,7 @@ ht_resize_pes(clht_t* h, int is_increase, int by) check_ht_status_steps = CLHT_STATUS_INVOK; - clht_hashtable_t* ht_old = pmemobj_direct(h->ht); + clht_hashtable_t* ht_old = clht_ptr_from_off(h->ht_off); if (TRYLOCK_ACQ(&h->resize_lock)) { @@ -784,7 +792,7 @@ ht_resize_pes(clht_t* h, int is_increase, int by) size_t b; for (b = 0; b < ht_old->num_buckets; b++) { - bucket_t* bu_cur = (bucket_t*)(pmemobj_direct(ht_old->table)) + b; + bucket_t* bu_cur = (bucket_t*)(clht_ptr_from_off(ht_old->table_off)) + b; int ret = bucket_cpy(h, bu_cur, ht_new); /* reached a point where the helper is handling */ if (ret == -1) return -1; @@ -806,7 +814,7 @@ ht_resize_pes(clht_t* h, int is_increase, int by) size_t b; for (b = 0; b < ht_old->num_buckets; b++) { - bucket_t* bu_cur = (bucket_t*)(pmemobj_direct(ht_old->table)) + b; + bucket_t* bu_cur = (bucket_t*)(clht_ptr_from_off(ht_old->table_off)) + b; int ret = bucket_cpy(h, bu_cur, ht_new); if (ret == -1) return -1; @@ -832,7 +840,7 @@ ht_resize_pes(clht_t* h, int is_increase, int by) mfence(); clflush((char *)ht_new, sizeof(clht_hashtable_t), false); - clflush_next_check((char *)pmemobj_direct(ht_new->table), num_buckets_new * sizeof(bucket_t), false); + clflush_next_check((char *)clht_ptr_from_off(ht_new->table_off), num_buckets_new * sizeof(bucket_t), false); mfence(); #if defined(CRASH_BEFORE_SWAP_CLHT) @@ -869,7 +877,8 @@ ht_resize_pes(clht_t* h, int is_increase, int by) // are there any race conditions? PMEMoid ht_new_oid = pmemobj_oid(ht_new); uint64_t ht_new_oid_off = ht_new_oid.off; - uint64_t h_new = (uint64_t)h + sizeof(uint64_t); + // uint64_t h_new = (uint64_t)h + sizeof(uint64_t); + uint64_t* h_new = &(h->ht_off); //SWAP_U64((uint64_t*) h, (uint64_t) ht_new); SWAP_U64((uint64_t*)h_new, ht_new_oid_off); @@ -934,6 +943,7 @@ ht_resize_pes(clht_t* h, int is_increase, int by) { ht_status(h, 1, 0); } + return 1; } @@ -947,7 +957,7 @@ clht_size(clht_hashtable_t* hashtable) uint64_t bin; for (bin = 0; bin < num_buckets; bin++) { - bucket = ((bucket_t*)pmemobj_direct(hashtable->table)) + bin; + bucket = ((bucket_t*)clht_ptr_from_off(hashtable->table_off)) + bin; uint32_t j; do @@ -976,8 +986,7 @@ ht_status(clht_t* h, int resize_increase, int just_print) { return 0; } - - clht_hashtable_t* hashtable = pmemobj_direct(h->ht); + clht_hashtable_t* hashtable = clht_ptr_from_off(h->ht_off); uint64_t num_buckets = hashtable->num_buckets; volatile bucket_t* bucket = NULL; size_t size = 0; @@ -987,7 +996,7 @@ ht_status(clht_t* h, int resize_increase, int just_print) uint64_t bin; for (bin = 0; bin < num_buckets; bin++) { - bucket = ((bucket_t*)pmemobj_direct(hashtable->table)) + bin; + bucket = ((bucket_t*)clht_ptr_from_off(hashtable->table_off)) + bin; int expands_cont = -1; expands--; @@ -1112,7 +1121,7 @@ clht_print(clht_hashtable_t* hashtable) uint64_t bin; for (bin = 0; bin < num_buckets; bin++) { - bucket = ((bucket_t*)pmemobj_direct(hashtable->table)) + bin; + bucket = ((bucket_t*)clht_ptr_from_off(hashtable->table_off)) + bin; printf("[[%05zu]] ", bin); @@ -1139,7 +1148,7 @@ clht_print(clht_hashtable_t* hashtable) void clht_lock_initialization(clht_t *h) { DEBUG_PRINT("Performing Lock initialization\n"); - clht_hashtable_t *ht = pmemobj_direct(h->ht); + clht_hashtable_t *ht = clht_ptr_from_off(h->ht_off); volatile bucket_t *next; h->resize_lock = LOCK_FREE; @@ -1148,7 +1157,7 @@ void clht_lock_initialization(clht_t *h) int i; for (i = 0; i < ht->num_buckets; i++) { - bucket_t* temp = pmemobj_direct(ht->table); + bucket_t* temp = clht_ptr_from_off(ht->table_off); temp[i].lock = LOCK_FREE; for (next = pmemobj_direct(temp[i].next); next != NULL; next = pmemobj_direct(next->next)) { next->lock = LOCK_FREE; diff --git a/P-CLHT/test.cpp b/P-CLHT/test.cpp index 89d3d116..fd0947f5 100644 --- a/P-CLHT/test.cpp +++ b/P-CLHT/test.cpp @@ -118,8 +118,7 @@ void run(char **argv) { barrier_cross(&barrier); for (uint64_t i = start_key; i < end_key; i++) { - PMEMoid pmem_ht = (PMEMoid)((tds[thread_id].ht)->ht); - clht_hashtable_t *ht = (clht_hashtable_t*)pmemobj_direct(pmem_ht); + clht_hashtable_t *ht = (clht_hashtable_t*)clht_ptr_from_off((tds[thread_id].ht)->ht_off); uintptr_t val = clht_get(ht, keys[i]); if (val != keys[i]) { std::cout << "[CLHT] wrong key read: " << val << "expected: " << keys[i] << std::endl; From 9b605d907c90b8ee17d20c9d199089ff7bc8f65f Mon Sep 17 00:00:00 2001 From: pyrito Date: Tue, 31 Mar 2020 22:51:57 -0500 Subject: [PATCH 15/30] Cache-line alignment fixed. --- P-CLHT/include/clht_lb_res.h | 3 ++- P-CLHT/src/clht_gc.c | 8 ++++---- P-CLHT/src/clht_lb_res.c | 38 ++++++++++++++++++------------------ 3 files changed, 25 insertions(+), 24 deletions(-) diff --git a/P-CLHT/include/clht_lb_res.h b/P-CLHT/include/clht_lb_res.h index 99ed062d..3836da9b 100644 --- a/P-CLHT/include/clht_lb_res.h +++ b/P-CLHT/include/clht_lb_res.h @@ -162,7 +162,8 @@ typedef struct ALIGNED(CACHE_LINE_SIZE) bucket_s clht_addr_t key[ENTRIES_PER_BUCKET]; clht_val_t val[ENTRIES_PER_BUCKET]; // volatile struct bucket_s* next; - PMEMoid next; + // PMEMoid next; + uint64_t next_off; } bucket_t; //#if __GNUC__ > 4 && __GNUC_MINOR__ > 4 diff --git a/P-CLHT/src/clht_gc.c b/P-CLHT/src/clht_gc.c index a34a6dcd..55246862 100644 --- a/P-CLHT/src/clht_gc.c +++ b/P-CLHT/src/clht_gc.c @@ -225,12 +225,12 @@ clht_gc_free(clht_hashtable_t* hashtable) for (bin = 0; bin < num_buckets; bin++) { bucket = ((bucket_t*)clht_ptr_from_off(hashtable->table_off)) + bin; - bucket = pmemobj_direct(bucket->next); + bucket = clht_ptr_from_off(bucket->next_off); while (bucket != NULL) { volatile bucket_t* cur = bucket; - bucket = pmemobj_direct(bucket->next); + bucket = clht_ptr_from_off(bucket->next_off); PMEMoid cur_oid = pmemobj_oid((void*) cur); pmemobj_free(&cur_oid); } @@ -279,12 +279,12 @@ clht_gc_release(clht_hashtable_t* hashtable) for (bin = 0; bin < num_buckets; bin++) { bucket = ((bucket_t*)clht_ptr_from_off(hashtable->table_off)) + bin; - bucket = pmemobj_direct(bucket->next); + bucket = clht_ptr_from_off(bucket->next_off); while (bucket != NULL) { volatile bucket_t* cur = bucket; - bucket = pmemobj_direct(bucket->next); + bucket = clht_ptr_from_off(bucket->next_off); ssmem_release(clht_alloc, (void*) cur); // PMEMoid cur_oid = pmemobj_oid((void*) cur); // pmemobj_free(&cur_oid); diff --git a/P-CLHT/src/clht_lb_res.c b/P-CLHT/src/clht_lb_res.c index 24e7c8ad..67426100 100644 --- a/P-CLHT/src/clht_lb_res.c +++ b/P-CLHT/src/clht_lb_res.c @@ -172,8 +172,8 @@ static inline void clflush_next_check(char *data, int len, bool fence) #elif CLWB asm volatile(".byte 0x66; xsaveopt %0" : "+m" (*(volatile char *)(ptr))); #endif - if (pmemobj_direct( ((bucket_t *)data)->next ) ) - clflush_next_check((char *)pmemobj_direct( ((bucket_t *)data)->next ), sizeof(bucket_t), false); + if (clht_ptr_from_off( ((bucket_t *)data)->next_off ) ) + clflush_next_check((char *)clht_ptr_from_off( ((bucket_t *)data)->next_off ), sizeof(bucket_t), false); while(read_tsc() < etsc) cpu_pause(); } if (fence) @@ -196,7 +196,7 @@ static int bucket_init(PMEMobjpool *pop_arg, void *ptr, void *arg) { bucket->key[j] = 0; } - bucket->next = OID_NULL; + bucket->next_off = OID_NULL.off; return 0; } @@ -449,7 +449,7 @@ clht_get(clht_hashtable_t* hashtable, clht_addr_t key) } } } - bucket = pmemobj_direct(bucket->next); + bucket = clht_ptr_from_off(bucket->next_off); } while (unlikely(bucket != NULL)); return 0; @@ -468,7 +468,7 @@ bucket_exists(volatile bucket_t* bucket, clht_addr_t key) return true; } } - bucket = pmemobj_direct(bucket->next); + bucket = clht_ptr_from_off(bucket->next_off); } while (unlikely(bucket != NULL)); return false; @@ -522,7 +522,7 @@ clht_put(clht_t* h, clht_addr_t key, clht_val_t val) } int resize = 0; - if (likely(pmemobj_direct(bucket->next) == NULL)) + if (likely(clht_ptr_from_off(bucket->next_off) == NULL)) { if (unlikely(empty == NULL)) { @@ -541,11 +541,11 @@ clht_put(clht_t* h, clht_addr_t key, clht_val_t val) #endif } TX_FINALLY { clflush((char *)b, sizeof(bucket_t), true); - bucket->next = pmemobj_oid(b); + bucket->next_off = pmemobj_oid(b).off; } TX_ONABORT { printf("Failed clht_put, rolling back\n"); } TX_END - bucket_t* next_ptr = pmemobj_direct(bucket->next); + bucket_t* next_ptr = clht_ptr_from_off(bucket->next_off); clflush((char *)&next_ptr, sizeof(uintptr_t), true); } else @@ -571,7 +571,7 @@ clht_put(clht_t* h, clht_addr_t key, clht_val_t val) } return true; } - bucket = pmemobj_direct(bucket->next); + bucket = clht_ptr_from_off(bucket->next_off); } while (true); @@ -623,7 +623,7 @@ clht_remove(clht_t* h, clht_addr_t key) return val; } } - bucket = pmemobj_direct(bucket->next); + bucket = clht_ptr_from_off(bucket->next_off); } while (unlikely(bucket != NULL)); LOCK_RLS(lock); @@ -650,12 +650,12 @@ clht_put_seq(clht_hashtable_t* hashtable, clht_addr_t key, clht_val_t val, uint6 } } - if (pmemobj_direct(bucket->next) == NULL) + if (clht_ptr_from_off(bucket->next_off) == NULL) { DPP(put_num_failed_expand); int null; - bucket->next = pmemobj_oid(clht_bucket_create()); - bucket_t* bucket_ptr = pmemobj_direct(bucket->next); + bucket->next_off = pmemobj_oid(clht_bucket_create()).off; + bucket_t* bucket_ptr = clht_ptr_from_off(bucket->next_off); TX_BEGIN(pop) { bucket_ptr->val[0] = val; bucket_ptr->key[0] = key; @@ -665,7 +665,7 @@ clht_put_seq(clht_hashtable_t* hashtable, clht_addr_t key, clht_val_t val, uint6 return true; } - bucket = pmemobj_direct(bucket->next); + bucket = clht_ptr_from_off(bucket->next_off); } while (true); } @@ -719,7 +719,7 @@ bucket_cpy(clht_t* h, volatile bucket_t* bucket, clht_hashtable_t* ht_new) clht_put_seq(ht_new, key, bucket->val[j], bin); } } - bucket = pmemobj_direct(bucket->next); + bucket = clht_ptr_from_off(bucket->next_off); } while (bucket != NULL); @@ -970,7 +970,7 @@ clht_size(clht_hashtable_t* hashtable) } } - bucket = pmemobj_direct(bucket->next); + bucket = clht_ptr_from_off(bucket->next_off); } while (bucket != NULL); } @@ -1013,7 +1013,7 @@ ht_status(clht_t* h, int resize_increase, int just_print) } } - bucket = pmemobj_direct(bucket->next); + bucket = clht_ptr_from_off(bucket->next_off); } while (bucket != NULL); @@ -1136,7 +1136,7 @@ clht_print(clht_hashtable_t* hashtable) } } - bucket = pmemobj_direct(bucket->next); + bucket = clht_ptr_from_off(bucket->next_off); printf(" ** -> "); } while (bucket != NULL); @@ -1159,7 +1159,7 @@ void clht_lock_initialization(clht_t *h) for (i = 0; i < ht->num_buckets; i++) { bucket_t* temp = clht_ptr_from_off(ht->table_off); temp[i].lock = LOCK_FREE; - for (next = pmemobj_direct(temp[i].next); next != NULL; next = pmemobj_direct(next->next)) { + for (next = clht_ptr_from_off(temp[i].next_off); next != NULL; next = clht_ptr_from_off(next->next_off)) { next->lock = LOCK_FREE; } } From 5dda4968947aaa46e5fb0e52655b892ec4593c1b Mon Sep 17 00:00:00 2001 From: pyrito Date: Sun, 5 Apr 2020 13:54:03 -0500 Subject: [PATCH 16/30] Added README information for PMDK Edited the main README to refer to the PMDK branch in the "limitations" section. Created a PMDK README for P-CLHT. --- P-CLHT/pmdk.md | 46 ++++++++++++++++++++++++++++++++++++++++ P-CLHT/src/clht_lb_res.c | 19 +++++++++++------ README.md | 2 +- 3 files changed, 59 insertions(+), 8 deletions(-) create mode 100644 P-CLHT/pmdk.md diff --git a/P-CLHT/pmdk.md b/P-CLHT/pmdk.md new file mode 100644 index 00000000..4a19f2bb --- /dev/null +++ b/P-CLHT/pmdk.md @@ -0,0 +1,46 @@ +## P-CLHT: Persistent Cache-Line Hash Table - PMDK + +This branch of P-CLHT uses PMDK to ensure the persistence and recoverability of the persistent cache-line hash table. All other details of this data structure are the same (cache line flushing, alignment, etc) except for the backend library used to ensure persistence. + +**Motivation** The current implementation does not have a way of recovering permanent memory leaks during a crash. The PMDK library, specifically `libpmemobj`, gives us useful internal structures such as `pmemobj_root`, which is a stored offset within the persistent memory pool that can be used to recover any data that was left in a partial state, etc. + +**How We Used PMDK** The entire conversion required us to replace any data structure pointers to point to the persistent memory pool using the non-transactional, atomic allocation functions such as `pmemobj_alloc`. Since the `PMEMoid` structs (which store the pool offset and id) were 16 bytes, some code manipulation was required to ensure the cache-line alignment of the data structure. Finally, transactions were used for major hashtable operations such as insertion, resizing, and deletion. This part is still being tested and is a work-in-progress. If you look through the code and compare it with the `master` branch, you can see that the changes follow a logical pattern, and the modifications are relatively minor. + +**How to test recoverability?** The best way to recover your hashtable is following the paradigm presented in `clht_open` where all the user has to do is use `pmemobj_root` to recover the root (a clht_t object basically) of the persistent memory pool. Please make sure that you are opening the same pool with the correct pool layout! + +## Build & Run +### How to enable PM? +1. Install PMDK +```$ git clone https://github.com/pmem/pmdk.git +$ cd pmdk +$ git checkout tags/1.6 +$ make -j +$ cd .. +``` +2. Emulate PM with Ext4-DAX mount +```$ sudo mkfs.ext4 -b 4096 -E stride=512 -F /dev/pmem0 +$ sudo mount -o dax /dev/pmem0 /mnt/pmem +``` + +3. Set pool_size and pool name in clht_lb_res.c. TODO: instructions to set up environment variables instead. + +4. Make accordingly and run the example. + +#### Build + +``` +$ mkdir build +$ cd build +$ cmake .. +$ make -j +``` + +#### Run + +``` +$ ./example 10000 4 + +usage: ./example [n] [nthreads] +n: number of keys (integer) +nthreads: number of threads (integer) +``` diff --git a/P-CLHT/src/clht_lb_res.c b/P-CLHT/src/clht_lb_res.c index 67426100..12c2e648 100644 --- a/P-CLHT/src/clht_lb_res.c +++ b/P-CLHT/src/clht_lb_res.c @@ -316,12 +316,15 @@ clht_create(uint64_t num_buckets) free(w); return NULL; } - w->resize_lock = LOCK_FREE; - w->gc_lock = LOCK_FREE; - w->status_lock = LOCK_FREE; - w->version_list = NULL; - w->version_min = 0; - w->ht_oldest = ht_ptr; + + TX_BEGIN(pop) { + w->resize_lock = LOCK_FREE; + w->gc_lock = LOCK_FREE; + w->status_lock = LOCK_FREE; + w->version_list = NULL; + w->version_min = 0; + w->ht_oldest = ht_ptr; + } TX_END // This should flush everything to persistent memory clflush((char *)clht_ptr_from_off(ht_ptr->table_off), num_buckets * sizeof(bucket_t), true); @@ -364,7 +367,9 @@ clht_hashtable_create(uint64_t num_buckets) fprintf(stderr, "pmemobj_alloc failed for table_oid in clht_hashtable_create\n"); assert(0); } - hashtable->table_off = table_oid.off; + TX_BEGIN(pop) { + hashtable->table_off = table_oid.off; + } TX_END bucket_t* bucket_ptr = clht_ptr_from_off(hashtable->table_off); if (bucket_ptr == NULL) diff --git a/README.md b/README.md index 7bdd853d..2d1c1768 100644 --- a/README.md +++ b/README.md @@ -166,7 +166,7 @@ For artifact evaluation, we will evaluate again the performance of the index str Just for performance testing on real PM, you can use [libvmmalloc](http://pmem.io/pmdk/manpages/linux/v1.3/libvmmalloc.3.html), which transparently converts all the dynamic memory allocations into Persistent Memory allocations. However, if you want to apply RECIPE indexes into your real PM application, you would need to change current volatile -memory allocators using [libpmem](https://pmem.io/pmdk/) APIs. +memory allocators using [libpmem](https://pmem.io/pmdk/) APIs. Currently, a version of RECIPE is being developed using the PMDK library (check the "pmdk" branch!) 2. Current implementations only ensure the lowest level of isolation (Read Uncommitted) when using them for transactional systems, since they are based on normal CASs and temporal stores coupled with cache line flush instructions. However, you may extend them From bbdd25007c9e82ca32e53de8f33938bc0eaaeaf6 Mon Sep 17 00:00:00 2001 From: pyrito Date: Sun, 5 Apr 2020 23:28:32 -0500 Subject: [PATCH 17/30] Added transaction configurability --- P-CLHT/example.cpp | 2 +- P-CLHT/include/clht_lb_res.h | 1 - P-CLHT/src/clht_lb_res.c | 121 ++++++++++++++++++++++++----------- 3 files changed, 84 insertions(+), 40 deletions(-) diff --git a/P-CLHT/example.cpp b/P-CLHT/example.cpp index 8f028765..ac4c0a05 100644 --- a/P-CLHT/example.cpp +++ b/P-CLHT/example.cpp @@ -120,7 +120,7 @@ void run(char **argv) { clht_hashtable_t *ht = (clht_hashtable_t*)clht_ptr_from_off((tds[thread_id].ht)->ht_off); uintptr_t val = clht_get(ht, keys[i]); if (val != keys[i]) { - std::cout << "[CLHT] wrong key read: " << val << "expected: " << keys[i] << std::endl; + std::cout << "[CLHT] wrong key read: " << val << " expected: " << keys[i] << std::endl; exit(1); } } diff --git a/P-CLHT/include/clht_lb_res.h b/P-CLHT/include/clht_lb_res.h index 3836da9b..b134eb5a 100644 --- a/P-CLHT/include/clht_lb_res.h +++ b/P-CLHT/include/clht_lb_res.h @@ -180,7 +180,6 @@ typedef struct ALIGNED(CACHE_LINE_SIZE) clht uint64_t ht_off; // struct clht_hashtable_s* ht; uint8_t next_cache_line[CACHE_LINE_SIZE - (sizeof(void*))]; - // Prob need to add TOID to this as well struct clht_hashtable_s* ht_oldest; struct ht_ts* version_list; size_t version_min; diff --git a/P-CLHT/src/clht_lb_res.c b/P-CLHT/src/clht_lb_res.c index 12c2e648..2e99a3d7 100644 --- a/P-CLHT/src/clht_lb_res.c +++ b/P-CLHT/src/clht_lb_res.c @@ -81,6 +81,7 @@ __thread size_t check_ht_status_steps = CLHT_STATUS_INVOK_IN; #endif */ +#define PMDK_TRANSACTION 1 const char* clht_type_desc() @@ -300,16 +301,23 @@ clht_create(uint64_t num_buckets) return NULL; } - clht_hashtable_t* ht_ptr = clht_hashtable_create(num_buckets);; - printf("clht_create ht_ptr->table.off: %d\n", ht_ptr->table_off); + clht_hashtable_t* ht_ptr; // Transactional allocation +#if PMDK_TRANSACTION TX_BEGIN(pop) { + ht_ptr= clht_hashtable_create(num_buckets); + printf("clht_create ht_ptr->table.off: %d\n", ht_ptr->table_off); //pmemobj_tx_abort(-1); w->ht_off = pmemobj_oid(ht_ptr).off; } TX_ONABORT { printf("Failed clht_hashtable_create, rolling back\n"); } TX_END; +#else + ht_ptr= clht_hashtable_create(num_buckets); + printf("clht_create ht_ptr->table.off: %d\n", ht_ptr->table_off); + w->ht_off = pmemobj_oid(ht_ptr).off; +#endif if (ht_ptr == NULL) { @@ -317,14 +325,12 @@ clht_create(uint64_t num_buckets) return NULL; } - TX_BEGIN(pop) { - w->resize_lock = LOCK_FREE; - w->gc_lock = LOCK_FREE; - w->status_lock = LOCK_FREE; - w->version_list = NULL; - w->version_min = 0; - w->ht_oldest = ht_ptr; - } TX_END + w->resize_lock = LOCK_FREE; + w->gc_lock = LOCK_FREE; + w->status_lock = LOCK_FREE; + w->version_list = NULL; + w->version_min = 0; + w->ht_oldest = ht_ptr; // This should flush everything to persistent memory clflush((char *)clht_ptr_from_off(ht_ptr->table_off), num_buckets * sizeof(bucket_t), true); @@ -367,9 +373,9 @@ clht_hashtable_create(uint64_t num_buckets) fprintf(stderr, "pmemobj_alloc failed for table_oid in clht_hashtable_create\n"); assert(0); } - TX_BEGIN(pop) { - hashtable->table_off = table_oid.off; - } TX_END + + hashtable->table_off = table_oid.off; + bucket_t* bucket_ptr = clht_ptr_from_off(hashtable->table_off); if (bucket_ptr == NULL) @@ -381,33 +387,32 @@ clht_hashtable_create(uint64_t num_buckets) memset(bucket_ptr, 0, num_buckets * (sizeof(bucket_t))); - TX_BEGIN(pop) { - uint64_t i; - for (i = 0; i < num_buckets; i++) + uint64_t i; + for (i = 0; i < num_buckets; i++) + { + bucket_ptr[i].lock = LOCK_FREE; + uint32_t j; + for (j = 0; j < ENTRIES_PER_BUCKET; j++) { - bucket_ptr[i].lock = LOCK_FREE; - uint32_t j; - for (j = 0; j < ENTRIES_PER_BUCKET; j++) - { - bucket_ptr[i].key[j] = 0; - } + bucket_ptr[i].key[j] = 0; } + } + + hashtable->num_buckets = num_buckets; + hashtable->hash = num_buckets - 1; + hashtable->version = 0; + hashtable->table_tmp = NULL; + hashtable->table_new = NULL; + hashtable->table_prev = NULL; + hashtable->num_expands = 0; + hashtable->num_expands_threshold = (CLHT_PERC_EXPANSIONS * num_buckets); + if (hashtable->num_expands_threshold == 0) + { + hashtable->num_expands_threshold = 1; + } + hashtable->is_helper = 1; + hashtable->helper_done = 0; - hashtable->num_buckets = num_buckets; - hashtable->hash = num_buckets - 1; - hashtable->version = 0; - hashtable->table_tmp = NULL; - hashtable->table_new = NULL; - hashtable->table_prev = NULL; - hashtable->num_expands = 0; - hashtable->num_expands_threshold = (CLHT_PERC_EXPANSIONS * num_buckets); - if (hashtable->num_expands_threshold == 0) - { - hashtable->num_expands_threshold = 1; - } - hashtable->is_helper = 1; - hashtable->helper_done = 0; - } TX_END return hashtable; } @@ -532,8 +537,10 @@ clht_put(clht_t* h, clht_addr_t key, clht_val_t val) if (unlikely(empty == NULL)) { DPP(put_num_failed_expand); - bucket_t* b = clht_bucket_create_stats(hashtable, &resize);; + bucket_t* b; +#if PMDK_TRANSACTION TX_BEGIN(pop) { + b = clht_bucket_create_stats(hashtable, &resize); b->val[0] = val; #ifdef __tile__ /* keep the writes in order */ @@ -550,8 +557,23 @@ clht_put(clht_t* h, clht_addr_t key, clht_val_t val) } TX_ONABORT { printf("Failed clht_put, rolling back\n"); } TX_END +#else + b = clht_bucket_create_stats(hashtable, &resize); + b->val[0] = val; +#ifdef __tile__ + /* keep the writes in order */ + _mm_sfence(); +#endif + b->key[0] = key; +#ifdef __tile__ + /* make sure they are visible */ + mm_sfence(); +#endif + clflush((char *)b, sizeof(bucket_t), true); + bucket->next_off = pmemobj_oid(b).off; bucket_t* next_ptr = clht_ptr_from_off(bucket->next_off); clflush((char *)&next_ptr, sizeof(uintptr_t), true); +#endif } else { @@ -620,10 +642,15 @@ clht_remove(clht_t* h, clht_addr_t key) { clht_val_t val = bucket->val[j]; // May not need this, if there is a crash, remove will not be persisted +#if PMDK_TRANSACTION TX_BEGIN(pop) { bucket->key[j] = 0; clflush((char *)&bucket->key[j], sizeof(uintptr_t), true); } TX_END +#else + bucket->key[j] = 0; + clflush((char *)&bucket->key[j], sizeof(uintptr_t), true); +#endif LOCK_RLS(lock); return val; } @@ -647,10 +674,15 @@ clht_put_seq(clht_hashtable_t* hashtable, clht_addr_t key, clht_val_t val, uint6 { if (bucket->key[j] == 0) { +#if PMDK_TRANSACTION TX_BEGIN(pop) { bucket->val[j] = val; bucket->key[j] = key; } TX_END +#else + bucket->val[j] = val; + bucket->key[j] = key; +#endif return true; } } @@ -661,12 +693,17 @@ clht_put_seq(clht_hashtable_t* hashtable, clht_addr_t key, clht_val_t val, uint6 int null; bucket->next_off = pmemobj_oid(clht_bucket_create()).off; bucket_t* bucket_ptr = clht_ptr_from_off(bucket->next_off); +#if PMDK_TRANSACTION TX_BEGIN(pop) { bucket_ptr->val[0] = val; bucket_ptr->key[0] = key; } TX_ONABORT { printf("Failed clht_put_seq, rolling back\n"); } TX_END +#else + bucket_ptr->val[0] = val; + bucket_ptr->key[0] = key; +#endif return true; } @@ -1041,11 +1078,15 @@ ht_status(clht_t* h, int resize_increase, int just_print) { // printf("[STATUS-%02d] #bu: %7zu / #elems: %7zu / full%%: %8.4f%% / expands: %4d / max expands: %2d\n", // clht_gc_get_id(), hashtable->num_buckets, size, full_ratio, expands, expands_max); +#if PMDK_TRANSACTION TX_BEGIN(pop) { ht_resize_pes(h, 0, 33); } TX_ONABORT { printf("Failed ht_resize_pes, rolling back\n"); } TX_END +#else + ht_resize_pes(h, 0, 33); +#endif } else if ((full_ratio > 0 && full_ratio > CLHT_PERC_FULL_DOUBLE) || expands_max > CLHT_MAX_EXPANSIONS || resize_increase) @@ -1061,11 +1102,15 @@ ht_status(clht_t* h, int resize_increase, int just_print) } DEBUG_PRINT("Callig ht_resize_pes\n"); int ret = 0; +#if PMDK_TRANSACTION TX_BEGIN(pop) { ret = ht_resize_pes(h, 1, inc_by_pow2); } TX_ONABORT { printf("Failed ht_resize_pes, rolling back\n"); } TX_END; +#else + ret = ht_resize_pes(h, 1, inc_by_pow2); +#endif // return if crashed if (ret == -1) return 0; From f4243fe5cf57454f6c1edc054c531f50ace926b4 Mon Sep 17 00:00:00 2001 From: pyrito Date: Wed, 8 Apr 2020 15:56:38 -0500 Subject: [PATCH 18/30] Documentation changes --- P-CLHT/pmdk.md | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/P-CLHT/pmdk.md b/P-CLHT/pmdk.md index 4a19f2bb..a5a056b6 100644 --- a/P-CLHT/pmdk.md +++ b/P-CLHT/pmdk.md @@ -7,6 +7,17 @@ This branch of P-CLHT uses PMDK to ensure the persistence and recoverability of **How We Used PMDK** The entire conversion required us to replace any data structure pointers to point to the persistent memory pool using the non-transactional, atomic allocation functions such as `pmemobj_alloc`. Since the `PMEMoid` structs (which store the pool offset and id) were 16 bytes, some code manipulation was required to ensure the cache-line alignment of the data structure. Finally, transactions were used for major hashtable operations such as insertion, resizing, and deletion. This part is still being tested and is a work-in-progress. If you look through the code and compare it with the `master` branch, you can see that the changes follow a logical pattern, and the modifications are relatively minor. **How to test recoverability?** The best way to recover your hashtable is following the paradigm presented in `clht_open` where all the user has to do is use `pmemobj_root` to recover the root (a clht_t object basically) of the persistent memory pool. Please make sure that you are opening the same pool with the correct pool layout! +``` +... +PMEMoid my_root = pmemobj_root(pop, sizeof(clht_t)); +if (pmemobj_direct(my_root) == NULL) +{ + perror("root pointer is null\n"); +} +... +clht_t* w = pmemobj_direct(my_root); +... +``` ## Build & Run ### How to enable PM? @@ -22,7 +33,20 @@ $ cd .. $ sudo mount -o dax /dev/pmem0 /mnt/pmem ``` -3. Set pool_size and pool name in clht_lb_res.c. TODO: instructions to set up environment variables instead. +3. Set pool_size and pool name appropriately using `pmemobj_open` or `pmemobj_create`. For example: +``` +// Size of the memory pool +size_t pool_size = 2*1024*1024*1024UL; +if( access("/mnt/pmem/pool", F_OK ) != -1 ) +{ + // If the pool already exists, open it + pop = pmemobj_open("/mnt/pmem/pool", POBJ_LAYOUT_NAME(clht)); +} else +{ + // If the pool does not exist, create it + pop = pmemobj_create("/mnt/pmem/pool", POBJ_LAYOUT_NAME(clht), pool_size, 0666); +} +``` 4. Make accordingly and run the example. From fb11b105d1d6440bb7aa83afe5ffb5a58ae5af5b Mon Sep 17 00:00:00 2001 From: sekwonlee Date: Thu, 9 Apr 2020 19:04:19 -0500 Subject: [PATCH 19/30] Initial commit applying PMDK to masstree --- P-Masstree/CMakeLists.txt | 11 +- P-Masstree/masstree.cpp | 426 +++++++++++++++++++++----------------- P-Masstree/masstree.h | 38 ++-- 3 files changed, 276 insertions(+), 199 deletions(-) diff --git a/P-Masstree/CMakeLists.txt b/P-Masstree/CMakeLists.txt index 4883fb1d..36b95dc1 100644 --- a/P-Masstree/CMakeLists.txt +++ b/P-Masstree/CMakeLists.txt @@ -33,7 +33,16 @@ endif() find_library(JemallocLib jemalloc) find_library(TbbLib tbb) +if(PKG_CONFIG_FOUND) + pkg_check_modules(LIBPMEMOBJ++ REQUIRED libpmemobj++) +else() + find_package(LIBPMEMOBJ++ REQUIRED) +endif() + +link_directories(${LIBPMEMOBJ++_LIBRARY_DIRS}) + set(P_MASS_TEST example.cpp masstree.cpp) add_executable(example ${P_MASS_TEST}) -target_link_libraries(example ${JemallocLib} ${TbbLib} atomic boost_system boost_thread) +target_link_libraries(example ${JemallocLib} ${TbbLib} + ${LIBPMEMOBJ++_LIBRARIES} atomic boost_system boost_thread) diff --git a/P-Masstree/masstree.cpp b/P-Masstree/masstree.cpp index 766b8247..f2e6a239 100644 --- a/P-Masstree/masstree.cpp +++ b/P-Masstree/masstree.cpp @@ -2,6 +2,10 @@ namespace masstree { +/* Global pool pointer */ +PMEMobjpool *pop; +uint64_t pool_uuid; + static constexpr uint64_t CACHE_LINE_SIZE = 64; static uint64_t CPU_FREQ_MHZ = 2100; static unsigned long write_latency = 0; @@ -56,6 +60,12 @@ static inline void prefetch_(const void *ptr) asm volatile("prefetcht0 %0" : : "m" (*(const cacheline_t *)ptr)); } +static inline void *ptr_from_off(uint64_t offset) +{ + PMEMoid oid = {pool_uuid, offset}; + return pmemobj_direct(oid); +} + #ifdef LOCK_INIT static tbb::concurrent_vector lock_initializer; void lock_initialization() @@ -67,23 +77,48 @@ void lock_initialization() } #endif -masstree::masstree() { +masstree::masstree () { leafnode *init_root = new leafnode(0); - root_ = init_root; - clflush((char *)root_, sizeof(leafnode), true); + root_ = pmemobj_oid(init_root).off; + clflush((char *)ptr_from_off(root_), sizeof(leafnode), true); } masstree::masstree (void *new_root) { - root_ = new_root; - clflush((char *)root_, sizeof(leafnode), true); // 304 is the leafnode size of masstree + root_ = pmemobj_oid(new_root).off; + clflush((char *)ptr_from_off(root_), sizeof(leafnode), true); // 304 is the leafnode size of masstree +} + +void *masstree::operator new(size_t size) { + // Open the PMEMpool if it exists, otherwise create it. + size_t pool_size = 2*1024*1024*1024UL; + if (access("/mnt/pmem/pool", F_OK) != -1) + pop = pmemobj_open("/mnt/pmem/pool", POBJ_LAYOUT_NAME(p_masstree)); + else + pop = pmemobj_create("/mnt/pmem/pool", "p_masstree", pool_size, 0666); + + if (pop == NULL) + perror("failed to open the pool\n"); + + // Create the root pointer + PMEMoid my_root = pmemobj_root(pop, size); + if (pmemobj_direct(my_root) == NULL) + perror("root pointer is null\n"); + pool_uuid = my_root.pool_uuid_lo; + + return pmemobj_direct(my_root); } leafnode::leafnode(uint32_t level) : permutation(permuter::make_empty()) { level_ = level; version_ = 0; - wlock = new std::mutex(); - next = NULL; - leftmost_ptr = NULL; + PMEMoid ret; + if (pmemobj_zalloc(pop, &ret, sizeof(PMEMmutex), 0)) { + fprintf(stderr, "pmemobj_zalloc failed for lock allocation\n"); + assert(0); + } + wlock = ret.off; + next = 0; + leftmost_ptr = 0; highest = 0; #ifdef LOCK_INIT lock_initializer.push_back(wlock); @@ -93,13 +128,18 @@ leafnode::leafnode(uint32_t level) : permutation(permuter::make_empty()) { leafnode::leafnode(void *left, uint64_t key, void *right, uint32_t level = 1) : permutation(permuter::make_empty()) { level_ = level; version_ = 0; - wlock = new std::mutex(); - next = NULL; + PMEMoid ret; + if (pmemobj_zalloc(pop, &ret, sizeof(PMEMmutex), 0)) { + fprintf(stderr, "pmemobj_zalloc failed for lock allocation\n"); + assert(0); + } + wlock = ret.off; + next = 0; highest = 0; - leftmost_ptr = reinterpret_cast (left); + leftmost_ptr = pmemobj_oid(left).off; entry[0].key = key; - entry[0].value = right; + entry[0].value = pmemobj_oid(right).off; permutation = permuter::make_sorted(1); #ifdef LOCK_INIT @@ -108,16 +148,19 @@ leafnode::leafnode(void *left, uint64_t key, void *right, uint32_t level = 1) : } void *leafnode::operator new(size_t size) { - void *ret; - posix_memalign(&ret, CACHE_LINE_SIZE, size); - return ret; + PMEMoid ret; + if (pmemobj_alloc(pop, &ret, size, 0, 0, 0)) { + fprintf(stderr, "pmemobj_alloc failed for leaf allocation\n"); + assert(0); + } + return pmemobj_direct(ret); } -void leafnode::lock() {wlock->lock();} +void leafnode::lock() {pmemobj_mutex_lock(pop, (PMEMmutex *)ptr_from_off(wlock));} -void leafnode::unlock() {wlock->unlock();} +void leafnode::unlock() {pmemobj_mutex_unlock(pop, (PMEMmutex *)ptr_from_off(wlock));} -bool leafnode::trylock() {return wlock->try_lock();} +int leafnode::trylock() {return pmemobj_mutex_trylock(pop, (PMEMmutex *)ptr_from_off(wlock));} int leafnode::compare_key(const uint64_t a, const uint64_t b) { @@ -169,7 +212,8 @@ leafnode *leafnode::advance_to_key(const uint64_t& key, bool checker) const leafnode *n = this; leafnode *next; - if ((next = n->next) && compare_key(key, next->highest) >= 0) { + if ((next = reinterpret_cast(ptr_from_off(n->next))) + && compare_key(key, next->highest) >= 0) { // if (!checker) { // printf("Reader must not come here\n"); // exit(0); @@ -204,11 +248,15 @@ void leafnode::prefetch() const leafvalue *masstree::make_leaf(char *key, size_t key_len, uint64_t value) { - void *aligned_alloc; size_t len = (key_len % sizeof(uint64_t)) == 0 ? key_len : (((key_len) / sizeof(uint64_t)) + 1) * sizeof(uint64_t); - posix_memalign(&aligned_alloc, CACHE_LINE_SIZE, sizeof(leafvalue) + len + sizeof(uint64_t)); - leafvalue *lv = reinterpret_cast (aligned_alloc); + PMEMoid ret; + if (pmemobj_alloc(pop, &ret, sizeof(leafvalue) + len + sizeof(uint64_t), 0, 0, 0)) { + fprintf(stderr, "pmemobj_alloc failed for leaf allocation\n"); + assert(0); + } + + leafvalue *lv = reinterpret_cast (pmemobj_direct(ret)); memset(lv, 0, sizeof(leafvalue) + len + sizeof(uint64_t)); lv->value = value; @@ -225,11 +273,15 @@ leafvalue *masstree::make_leaf(char *key, size_t key_len, uint64_t value) leafvalue *leafnode::smallest_leaf(size_t key_len, uint64_t value) { - void *aligned_alloc; size_t len = (key_len % sizeof(uint64_t)) == 0 ? key_len : (((key_len) / sizeof(uint64_t)) + 1) * sizeof(uint64_t); - posix_memalign(&aligned_alloc, CACHE_LINE_SIZE, sizeof(leafvalue) + len); - leafvalue *lv = reinterpret_cast (aligned_alloc); + PMEMoid ret; + if (pmemobj_alloc(pop, &ret, sizeof(leafvalue) + len, 0, 0, 0)) { + fprintf(stderr, "pmemobj_alloc failed for leaf allocation\n"); + assert(0); + } + + leafvalue *lv = reinterpret_cast (pmemobj_direct(ret)); memset(lv, 0, sizeof(leafvalue) + len); lv->value = value; @@ -255,7 +307,7 @@ void leafnode::make_new_layer(leafnode *l, key_indexed_position &kx_, leafvalue leafnode *nl = new leafnode(0); nl->assign_initialize_for_layer(0, olv->fkey[depth]); if (twig_head != l) - twig_tail->entry[0].value = nl; + twig_tail->entry[0].value = pmemobj_oid(nl).off; else twig_head = nl; nl->permutation = permuter::make_sorted(1); @@ -265,30 +317,30 @@ void leafnode::make_new_layer(leafnode *l, key_indexed_position &kx_, leafvalue } leafnode *nl = new leafnode(0); - nl->assign_initialize(0, kcmp < 0 ? olv->fkey[depth] : nlv->fkey[depth], kcmp < 0 ? SET_LV(olv) : SET_LV(nlv)); - nl->assign_initialize(1, kcmp < 0 ? nlv->fkey[depth] : olv->fkey[depth], kcmp < 0 ? SET_LV(nlv) : SET_LV(olv)); + nl->assign_initialize(0, kcmp < 0 ? olv->fkey[depth] : nlv->fkey[depth], kcmp < 0 ? SET_LV(pmemobj_oid(olv).off) : SET_LV(pmemobj_oid(nlv).off)); + nl->assign_initialize(1, kcmp < 0 ? nlv->fkey[depth] : olv->fkey[depth], kcmp < 0 ? SET_LV(pmemobj_oid(nlv).off) : SET_LV(pmemobj_oid(olv).off)); nl->permutation = permuter::make_sorted(2); fence(); if (twig_tail != l) - twig_tail->entry[0].value = nl; + twig_tail->entry[0].value = pmemobj_oid(nl).off; twig_tail = nl; if (twig_head != l) { leafnode *iter = twig_head; mfence(); - for ( ; iter != twig_tail && iter != NULL; iter = reinterpret_cast (iter->entry[0].value)) { + for ( ; iter != twig_tail && iter != NULL; iter = reinterpret_cast (ptr_from_off(iter->entry[0].value))) { clflush((char *)iter, sizeof(leafnode), false); } clflush((char *)twig_tail, sizeof(leafnode), false); mfence(); - l->entry[kx_.p].value = twig_head; + l->entry[kx_.p].value = pmemobj_oid(twig_head).off; clflush((char *)l->entry_addr(kx_.p) + 8, sizeof(uintptr_t), true); } else { clflush((char *)nl, sizeof(leafnode), true); - l->entry[kx_.p].value = nl; + l->entry[kx_.p].value = pmemobj_oid(nl).off; clflush((char *)l->entry_addr(kx_.p) + 8, sizeof(uintptr_t), true); } } @@ -313,10 +365,10 @@ void leafnode::check_for_recovery(masstree *t, leafnode *left, leafnode *right, if (depth > 0) { key_indexed_position pkx_; leafnode *p = correct_layer_root(root, lv, depth, pkx_); - if (p->value(pkx_.p) == left) { + if (p->value(pkx_.p) == pmemobj_oid(left).off) { leafnode *new_root = new leafnode(left, right->highest, right, left->level() + 1); clflush((char *) new_root, sizeof(leafnode), true); - p->entry[pkx_.p].value = new_root; + p->entry[pkx_.p].value = pmemobj_oid(new_root).off; clflush((char *) &p->entry[pkx_.p].value, sizeof(uintptr_t), true); p->unlock(); @@ -324,10 +376,10 @@ void leafnode::check_for_recovery(masstree *t, leafnode *left, leafnode *right, left->unlock(); } else { root = p; - t->split(p->entry[pkx_.p].value, root, depth, lv, right->highest, right, left->level() + 1, left); + t->split(ptr_from_off(p->entry[pkx_.p].value), root, depth, lv, right->highest, right, left->level() + 1, left); } } else { - if (t->root() == left) { + if (t->root() == pmemobj_oid(left).off) { leafnode *new_root = new leafnode(left, right->highest, right, left->level() + 1); clflush((char *) new_root, sizeof(leafnode), true); t->setNewRoot(new_root); @@ -347,14 +399,14 @@ void masstree::put(uint64_t key, void *value) leafnode *next; from_root: - leafnode *p = reinterpret_cast (this->root_); + leafnode *p = reinterpret_cast (ptr_from_off(this->root_)); while (p->level() != 0) { inter_retry: next = p->advance_to_key(key, true); if (next != p) { // check for recovery - if (p->trylock()) { - if (next->trylock()) + if (p->trylock() == 0) { + if (next->trylock() == 0) p->check_for_recovery(this, p, next, NULL, 0, NULL); else p->unlock(); @@ -368,19 +420,19 @@ void masstree::put(uint64_t key, void *value) kx_ = p->key_lower_bound(key); - void *snapshot_v; + uint64_t snapshot_v; if (kx_.i >= 0) { snapshot_v = p->value(kx_.p); fence(); if (p->key(kx_.p) <= key) { if (snapshot_v == p->value(kx_.p)) - p = reinterpret_cast(snapshot_v); + p = reinterpret_cast(ptr_from_off(snapshot_v)); else { goto inter_retry; } } } else { - p = p->leftmost(); + p = reinterpret_cast(ptr_from_off(p->leftmost())); } } @@ -389,8 +441,8 @@ void masstree::put(uint64_t key, void *value) next = l->advance_to_key(key, true); if (next != l) { //check for recovery - if (l->trylock()) { - if (next->trylock()) + if (l->trylock() == 0) { + if (next->trylock() == 0) l->check_for_recovery(this, l, next, NULL, 0, NULL); else l->unlock(); @@ -430,7 +482,7 @@ void masstree::put(uint64_t key, void *value) void masstree::put(char *key, uint64_t value) { restart: - void *root = this->root_; + void *root = ptr_from_off(this->root_); key_indexed_position kx_; uint32_t depth = 0; leafnode *next; @@ -444,8 +496,8 @@ void masstree::put(char *key, uint64_t value) next = p->advance_to_key(lv->fkey[depth], true); if (next != p) { // check for recovery - if (p->trylock()) { - if (next->trylock()) + if (p->trylock() == 0) { + if (next->trylock() == 0) p->check_for_recovery(this, p, next, root, depth, lv); else p->unlock(); @@ -459,19 +511,19 @@ void masstree::put(char *key, uint64_t value) kx_ = p->key_lower_bound(lv->fkey[depth]); - void *snapshot_v; + uint64_t snapshot_v; if (kx_.i >= 0) { snapshot_v = p->value(kx_.p); fence(); if (p->key(kx_.p) <= lv->fkey[depth]) { if (snapshot_v == p->value(kx_.p)) - p = reinterpret_cast(snapshot_v); + p = reinterpret_cast(ptr_from_off(snapshot_v)); else { goto inter_retry; } } } else { - p = p->leftmost(); + p = reinterpret_cast(ptr_from_off(p->leftmost())); } } @@ -480,8 +532,8 @@ void masstree::put(char *key, uint64_t value) next = l->advance_to_key(lv->fkey[depth], true); if (next != l) { //check for recovery - if (l->trylock()) { - if (next->trylock()) + if (l->trylock() == 0) { + if (next->trylock() == 0) l->check_for_recovery(this, l, next, root, depth, lv); else l->unlock(); @@ -511,27 +563,27 @@ void masstree::put(char *key, uint64_t value) if (kx_.p >= 0) { // i) If there is additional layer, retry B+tree traversing from the next layer if (!IS_LV(l->value(kx_.p))) { - p = reinterpret_cast (l->value(kx_.p)); + p = reinterpret_cast (ptr_from_off(l->value(kx_.p))); root = l; depth++; l->unlock(); goto from_root; // ii) Atomically update value for the matching key - } else if (IS_LV(l->value(kx_.p)) && (LV_PTR(l->value(kx_.p)))->key_len == lv->key_len && - memcmp(lv->fkey, (LV_PTR(l->value(kx_.p)))->fkey, lv->key_len) == 0) { - (LV_PTR(l->value(kx_.p)))->value = value; - clflush((char *)&(LV_PTR(l->value(kx_.p)))->value, sizeof(void *), true); + } else if (IS_LV(l->value(kx_.p)) && ((leafvalue *)ptr_from_off((LV_PTR(l->value(kx_.p)))))->key_len == lv->key_len && + memcmp(lv->fkey, ((leafvalue *)ptr_from_off((LV_PTR(l->value(kx_.p)))))->fkey, lv->key_len) == 0) { + ((leafvalue *)ptr_from_off((LV_PTR(l->value(kx_.p)))))->value = value; + clflush((char *)&((leafvalue *)ptr_from_off((LV_PTR(l->value(kx_.p)))))->value, sizeof(void *), true); l->unlock(); // iii) Allocate additional layers (B+tree's roots) up to // the number of common prefixes (8bytes unit). // Insert two keys to the leafnode in the last layer // During these processes, this leafnode must be locked } else { - l->make_new_layer(l, kx_, LV_PTR(l->value(kx_.p)), lv, ++depth); + l->make_new_layer(l, kx_, ((leafvalue *)ptr_from_off(LV_PTR(l->value(kx_.p)))), lv, ++depth); l->unlock(); } } else { - if (!(l->leaf_insert(this, root, depth, lv, lv->fkey[depth], SET_LV(lv), kx_, true, true, NULL))) { + if (!(l->leaf_insert(this, root, depth, lv, lv->fkey[depth], SET_LV(pmemobj_oid(lv).off), kx_, true, true, NULL))) { put(key, value); } } @@ -539,11 +591,11 @@ void masstree::put(char *key, uint64_t value) void masstree::del(uint64_t key) { - void *root = this->root_; + void *root = ptr_from_off(this->root_); key_indexed_position kx_; uint32_t depth = 0; leafnode *next; - void *snapshot_v; + uint64_t snapshot_v; leafnode *p = reinterpret_cast (root); while (p->level() != 0) { @@ -551,8 +603,8 @@ void masstree::del(uint64_t key) next = p->advance_to_key(key, true); if (next != p) { // check for recovery - if (p->trylock()) { - if (next->trylock()) + if (p->trylock() == 0) { + if (next->trylock() == 0) p->check_for_recovery(this, p, next, NULL, 0, NULL); else p->unlock(); @@ -570,12 +622,12 @@ void masstree::del(uint64_t key) fence(); if (p->key(kx_.p) <= key) { if (snapshot_v == p->value(kx_.p)) - p = reinterpret_cast(snapshot_v); + p = reinterpret_cast(ptr_from_off(snapshot_v)); else goto inter_retry; } } else { - p = p->leftmost(); + p = reinterpret_cast(ptr_from_off(p->leftmost())); } } @@ -584,8 +636,8 @@ void masstree::del(uint64_t key) next = l->advance_to_key(key, true); if (next != l) { //check for recovery - if (l->trylock()) { - if (next->trylock()) + if (l->trylock() == 0) { + if (next->trylock() == 0) l->check_for_recovery(this, l, next, NULL, 0, NULL); else l->unlock(); @@ -617,7 +669,7 @@ void masstree::del(uint64_t key) void masstree::del(char *key) { - void *root = this->root_; + void *root = ptr_from_off(this->root_); key_indexed_position kx_; uint32_t depth = 0; leafnode *next; @@ -631,8 +683,8 @@ void masstree::del(char *key) next = p->advance_to_key(lv->fkey[depth], true); if (next != p) { // check for recovery - if (p->trylock()) { - if (next->trylock()) + if (p->trylock() == 0) { + if (next->trylock() == 0) p->check_for_recovery(this, p, next, root, depth, lv); else p->unlock(); @@ -646,19 +698,19 @@ void masstree::del(char *key) kx_ = p->key_lower_bound(lv->fkey[depth]); - void *snapshot_v; + uint64_t snapshot_v; if (kx_.i >= 0) { snapshot_v = p->value(kx_.p); fence(); if (p->key(kx_.p) <= lv->fkey[depth]) { if (snapshot_v == p->value(kx_.p)) - p = reinterpret_cast(snapshot_v); + p = reinterpret_cast(ptr_from_off(snapshot_v)); else { goto inter_retry; } } } else { - p = p->leftmost(); + p = reinterpret_cast(ptr_from_off(p->leftmost())); } } @@ -667,8 +719,8 @@ void masstree::del(char *key) next = l->advance_to_key(lv->fkey[depth], true); if (next != l) { //check for recovery - if (l->trylock()) { - if (next->trylock()) + if (l->trylock() == 0) { + if (next->trylock() == 0) l->check_for_recovery(this, l, next, root, depth, lv); else l->unlock(); @@ -693,14 +745,14 @@ void masstree::del(char *key) if (kx_.p >= 0) { // i) If there is additional layer, retry B+tree traversing from the next layer if (!IS_LV(l->value(kx_.p))) { - p = reinterpret_cast (l->value(kx_.p)); + p = reinterpret_cast (ptr_from_off(l->value(kx_.p))); root = l; depth++; l->unlock(); goto from_root; // ii) Checking false-positive result and starting to delete it - } else if (IS_LV(l->value(kx_.p)) && (LV_PTR(l->value(kx_.p)))->key_len == lv->key_len && - memcmp(lv->fkey, (LV_PTR(l->value(kx_.p)))->fkey, lv->key_len) == 0) { + } else if (IS_LV(l->value(kx_.p)) && ((leafvalue *)ptr_from_off((LV_PTR(l->value(kx_.p)))))->key_len == lv->key_len && + memcmp(lv->fkey, ((leafvalue *)ptr_from_off((LV_PTR(l->value(kx_.p)))))->fkey, lv->key_len) == 0) { if (!(l->leaf_delete(this, root, depth, lv, lv->fkey[depth], kx_, true, true, NULL))) { del(key); } @@ -717,7 +769,7 @@ void masstree::del(char *key) inline void leafnode::assign_initialize(int p, const uint64_t& key, void *value) { entry[p].key = key; - entry[p].value = value; + entry[p].value = (uint64_t) value; } inline void leafnode::assign_initialize(int p, leafnode *x, int xp) @@ -756,8 +808,8 @@ int leafnode::split_into(leafnode *nr, int p, const uint64_t& key, void *value, nr->highest = nr->entry[0].key; nr->next = this->next; clflush((char *)nr, sizeof(leafnode), true); - this->next = nr; - clflush((char *)(&this->next), sizeof(uintptr_t), true); + this->next = pmemobj_oid(nr).off; + clflush((char *)(&this->next), sizeof(uint64_t), true); split_key = nr->highest; return p >= mid ? 1 + (mid == LEAF_WIDTH) : 0; @@ -779,12 +831,12 @@ void leafnode::split_into_inter(leafnode *nr, int p, const uint64_t& key, void * nr->permutation = permr.value(); //leafnode::link_split(this, nr); - nr->leftmost_ptr = reinterpret_cast(this->entry[perml[mid - 1]].value); + nr->leftmost_ptr = this->entry[perml[mid - 1]].value; nr->highest = this->entry[perml[mid - 1]].key; nr->next = this->next; clflush((char *)nr, sizeof(leafnode), true); - this->next = nr; - clflush((char *)(&this->next), sizeof(uintptr_t), true); + this->next = pmemobj_oid(nr).off; + clflush((char *)(&this->next), sizeof(uint64_t), true); split_key = nr->highest; //return p >= mid ? 1 + (mid == LEAF_WIDTH) : 0; @@ -794,13 +846,15 @@ void leafnode::assign(int p, const uint64_t& key, void *value) { entry[p].key = key; fence(); - entry[p].value = value; + // TODO: Need to add an identifier about which DIMM includes this value + entry[p].value = (uint64_t) value; } void leafnode::assign_value(int p, void *value) { - entry[p].value = value; - clflush((char *)&entry[p].value, sizeof(void *), true); + // TODO: Need to add identifier about which DIMM includes this value + entry[p].value = (uint64_t) value; + clflush((char *)&entry[p].value, sizeof(uint64_t), true); } void *leafnode::entry_addr(int p) @@ -810,8 +864,8 @@ void *leafnode::entry_addr(int p) void masstree::setNewRoot(void *new_root) { - this->root_ = new_root; - clflush((char *)&this->root_, sizeof(void *), true); + this->root_ = pmemobj_oid(new_root).off; + clflush((char *)&this->root_, sizeof(uint64_t), true); } leafnode *leafnode::correct_layer_root(void *root, leafvalue *lv, uint32_t depth, key_indexed_position &pkx_) @@ -865,24 +919,24 @@ leafnode *leafnode::search_for_leftsibling(void *root, uint64_t key, uint32_t le kx_ = p->key_lower_bound(key); - void *snapshot_v; + uint64_t snapshot_v; if (kx_.i >= 0) { snapshot_v = p->value(kx_.p); fence(); if (p->key(kx_.p) <= key) { if (snapshot_v == p->value(kx_.p)) - p = reinterpret_cast(snapshot_v); + p = reinterpret_cast(ptr_from_off(snapshot_v)); else { goto inter_retry; } } } else { - p = p->leftmost(); + p = reinterpret_cast(ptr_from_off(p->leftmost())); } } leaf_retry: - if (p->trylock()) { + if (p->trylock() == 0) { next = p->advance_to_key(key, true); if (next != p) { p->unlock(); @@ -933,9 +987,10 @@ void *leafnode::leaf_insert(masstree *t, void *root, uint32_t depth, leafvalue * // 2) replay the original split process from the third step that removes the half of // the entries from the left sibling. (this would be more reasonable in terms of // reusing the existing split mechanism) - if (this->next != NULL && this->key(this->permutation[this->permutation.size() - 1]) > this->next->highest) { - this->next = this->next->next; - clflush((char *)&this->next, sizeof(leafnode *), true); + if (this->next != 0 && this->key(this->permutation[this->permutation.size() - 1]) + > ((leafnode *)ptr_from_off(this->next))->highest) { + this->next = ((leafnode *)ptr_from_off(this->next))->next; + clflush((char *)&this->next, sizeof(uint64_t), true); } leafnode *new_sibling = new leafnode(this->level_); @@ -960,18 +1015,18 @@ void *leafnode::leaf_insert(masstree *t, void *root, uint32_t depth, leafvalue * if (depth > 0) { key_indexed_position pkx_; leafnode *p = correct_layer_root(root, lv, depth, pkx_); - if (p->value(pkx_.p) == this) { + if (p->value(pkx_.p) == pmemobj_oid(this).off) { leafnode *new_root = new leafnode(this, split_key, new_sibling, level_ + 1); clflush((char *) new_root, sizeof(leafnode), true); - p->entry[pkx_.p].value = new_root; + p->entry[pkx_.p].value = pmemobj_oid(new_root).off; clflush((char *) &p->entry[pkx_.p].value, sizeof(uintptr_t), true); p->unlock(); } else { root = p; - t->split(p->entry[pkx_.p].value, root, depth, lv, split_key, new_sibling, level_ + 1, NULL); + t->split(ptr_from_off(p->entry[pkx_.p].value), root, depth, lv, split_key, new_sibling, level_ + 1, NULL); } } else { - if (t->root() == this) { + if (t->root() == pmemobj_oid(this).off) { leafnode *new_root = new leafnode(this, split_key, new_sibling, level_ + 1); clflush((char *) new_root, sizeof(leafnode), true); t->setNewRoot(new_root); @@ -1036,7 +1091,7 @@ void *leafnode::leaf_delete(masstree *t, void *root, uint32_t depth, leafvalue * if (depth > 0) { key_indexed_position pkx_; leafnode *p = correct_layer_root(root, lv, depth, pkx_); - if (p->value(pkx_.p) == nr) { + if (p->value(pkx_.p) == pmemobj_oid(nr).off) { cp = nr->permutation.value(); cp = cp.make_empty(); fence(); @@ -1046,17 +1101,17 @@ void *leafnode::leaf_delete(masstree *t, void *root, uint32_t depth, leafvalue * nr->unlock(); return nr; } else { - nl = search_for_leftsibling(p->entry[pkx_.p].value, nr->highest ? nr->highest - 1 : nr->highest, nr->level_, nr); - merge_state = t->merge(p->entry[pkx_.p].value, reinterpret_cast (p), depth, lv, nr->highest, nr->level_ + 1, NULL); + nl = search_for_leftsibling(ptr_from_off(p->entry[pkx_.p].value), nr->highest ? nr->highest - 1 : nr->highest, nr->level_, nr); + merge_state = t->merge(ptr_from_off(p->entry[pkx_.p].value), reinterpret_cast (p), depth, lv, nr->highest, nr->level_ + 1, NULL); if (merge_state == 16) { p = correct_layer_root(root, lv, depth, pkx_); - p->entry[pkx_.p].value = nr; + p->entry[pkx_.p].value = pmemobj_oid(nr).off; clflush((char *)&p->entry[pkx_.p].value, sizeof(void *), true); p->unlock(); } } } else { - if (t->root() == nr) { + if (t->root() == pmemobj_oid(nr).off) { cp = nr->permutation.value(); cp = cp.make_empty(); fence(); @@ -1065,7 +1120,7 @@ void *leafnode::leaf_delete(masstree *t, void *root, uint32_t depth, leafvalue * nr->unlock(); return nr; } else { - nl = search_for_leftsibling(t->root(), nr->highest ? nr->highest - 1 : nr->highest, nr->level_, nr); + nl = search_for_leftsibling(ptr_from_off(t->root()), nr->highest ? nr->highest - 1 : nr->highest, nr->level_, nr); merge_state = t->merge(NULL, NULL, 0, NULL, nr->highest, nr->level_ + 1, NULL); if (merge_state == 16) t->setNewRoot(nr); @@ -1105,7 +1160,7 @@ void *leafnode::inter_insert(masstree *t, void *root, uint32_t depth, leafvalue // permutation based insert if (this->permutation.size() < LEAF_WIDTH) { kx_.p = this->permutation.back(); - this->assign(kx_.p, key, value); + this->assign(kx_.p, key, (void *)pmemobj_oid(value).off); clflush((char *)(&this->entry[kx_.p]), sizeof(kv), true); permuter cp = this->permutation.value(); @@ -1115,7 +1170,7 @@ void *leafnode::inter_insert(masstree *t, void *root, uint32_t depth, leafvalue clflush((char *)(&this->permutation), sizeof(permuter), true); if (child != NULL) { - child->next->unlock(); + ((leafnode *)ptr_from_off(child->next))->unlock(); child->unlock(); } @@ -1137,9 +1192,10 @@ void *leafnode::inter_insert(masstree *t, void *root, uint32_t depth, leafvalue // 2) replay the original split process from the third step that removes the half of // the entries from the left sibling. (this would be more reasonable in terms of // reusing the existing split mechanism) - if (this->next != NULL && this->key(this->permutation[this->permutation.size() - 1]) > this->next->highest) { - this->next = this->next->next; - clflush((char *)&this->next, sizeof(leafnode *), true); + if (this->next != 0 && this->key(this->permutation[this->permutation.size() - 1]) + > ((leafnode *)ptr_from_off(this->next))->highest) { + this->next = ((leafnode *)ptr_from_off(this->next))->next; + clflush((char *)&this->next, sizeof(uint64_t), true); } leafnode *new_sibling = new leafnode(this->level_); @@ -1164,7 +1220,7 @@ void *leafnode::inter_insert(masstree *t, void *root, uint32_t depth, leafvalue if (key < split_key) { kx_.p = nl->permutation.back(); - nl->assign(kx_.p, key, value); + nl->assign(kx_.p, key, (void *)pmemobj_oid(value).off); clflush((char *)(&nl->entry[kx_.p]), sizeof(kv), true); permuter cp = nl->permutation.value(); @@ -1177,7 +1233,7 @@ void *leafnode::inter_insert(masstree *t, void *root, uint32_t depth, leafvalue } else { kx_ = nr->key_lower_bound_by(key); kx_.p = nr->permutation.back(); - nr->assign(kx_.p, key, value); + nr->assign(kx_.p, key, (void *)pmemobj_oid(value).off); clflush((char *)(&nr->entry[kx_.p]), sizeof(kv), true); permuter cp = nr->permutation.value(); @@ -1191,33 +1247,33 @@ void *leafnode::inter_insert(masstree *t, void *root, uint32_t depth, leafvalue // lock coupling (hand-over-hand locking) if (child != NULL) { - child->next->unlock(); + ((leafnode *)ptr_from_off(child->next))->unlock(); child->unlock(); } if (depth > 0) { key_indexed_position pkx_; leafnode *p = correct_layer_root(root, lv, depth, pkx_); - if (p->value(pkx_.p) == this) { + if (p->value(pkx_.p) == pmemobj_oid(this).off) { leafnode *new_root = new leafnode(this, split_key, new_sibling, level_ + 1); clflush((char *) new_root, sizeof(leafnode), true); - p->entry[pkx_.p].value = new_root; - clflush((char *) &p->entry[pkx_.p].value, sizeof(uintptr_t), true); + p->entry[pkx_.p].value = pmemobj_oid(new_root).off; + clflush((char *) &p->entry[pkx_.p].value, sizeof(uint64_t), true); p->unlock(); - this->next->unlock(); + ((leafnode *)ptr_from_off(this->next))->unlock(); this->unlock(); } else { root = p; - t->split(p->entry[pkx_.p].value, root, depth, lv, split_key, new_sibling, level_ + 1, this); + t->split(ptr_from_off(p->entry[pkx_.p].value), root, depth, lv, split_key, new_sibling, level_ + 1, this); } } else { - if (t->root() == this) { + if (t->root() == pmemobj_oid(this).off) { leafnode *new_root = new leafnode(this, split_key, new_sibling, level_ + 1); clflush((char *) new_root, sizeof(leafnode), true); t->setNewRoot(new_root); - this->next->unlock(); + ((leafnode *)ptr_from_off(this->next))->unlock(); this->unlock(); } else { t->split(NULL, NULL, 0, NULL, split_key, new_sibling, level_ + 1, this); @@ -1255,22 +1311,22 @@ int leafnode::inter_delete(masstree *t, void *root, uint32_t depth, leafvalue *l if (depth > 0) { key_indexed_position pkx_; leafnode *p = correct_layer_root(root, lv, depth, pkx_); - if (p->value(pkx_.p) == nr) { + if (p->value(pkx_.p) == pmemobj_oid(nr).off) { kx_.i = 16; p->unlock(); nr->unlock(); return (ret = kx_.i); } else { - nl = search_for_leftsibling(p->entry[pkx_.p].value, nr->highest ? nr->highest - 1 : nr->highest, nr->level_, nr); - merge_state = t->merge(p->entry[pkx_.p].value, root, depth, lv, nr->highest, nr->level_ + 1, nl); + nl = search_for_leftsibling(ptr_from_off(p->entry[pkx_.p].value), nr->highest ? nr->highest - 1 : nr->highest, nr->level_, nr); + merge_state = t->merge(ptr_from_off(p->entry[pkx_.p].value), root, depth, lv, nr->highest, nr->level_ + 1, nl); } } else { - if (t->root() == nr) { + if (t->root() == pmemobj_oid(nr).off) { kx_.i = 16; nr->unlock(); return (ret = kx_.i); } else { - nl = search_for_leftsibling(t->root(), nr->highest ? nr->highest - 1 : nr->highest, nr->level_, nr); + nl = search_for_leftsibling(ptr_from_off(t->root()), nr->highest ? nr->highest - 1 : nr->highest, nr->level_, nr); merge_state = t->merge(NULL, NULL, 0, NULL, nr->highest, nr->level_ + 1, nl); } } @@ -1308,9 +1364,9 @@ void masstree::split(void *left, void *root, uint32_t depth, leafvalue *lv, p = reinterpret_cast (left); reinterpret_cast (root)->unlock(); } else { - if (level > reinterpret_cast(root_)->level()) + if (level > reinterpret_cast(ptr_from_off(root_))->level()) return ; - p = reinterpret_cast (root_); + p = reinterpret_cast (ptr_from_off(root_)); } while (p->level() > level) { @@ -1326,19 +1382,19 @@ void masstree::split(void *left, void *root, uint32_t depth, leafvalue *lv, kx_ = p->key_lower_bound(key); - void *snapshot_v; + uint64_t snapshot_v; if (kx_.i >= 0) { snapshot_v = p->value(kx_.p); fence(); if (p->key(kx_.p) <= key) { if (snapshot_v == p->value(kx_.p)) - p = reinterpret_cast(snapshot_v); + p = reinterpret_cast(ptr_from_off(snapshot_v)); else { goto inter_retry; } } } else { - p = p->leftmost(); + p = reinterpret_cast(ptr_from_off(p->leftmost())); } } @@ -1375,7 +1431,7 @@ int masstree::merge(void *left, void *root, uint32_t depth, leafvalue *lv, key_indexed_position kx_; uint64_t oldv; leafnode *next; - void *snapshot_v; + uint64_t snapshot_v; if (depth > 0) { //if (level > reinterpret_cast(left)->level()) @@ -1385,7 +1441,7 @@ int masstree::merge(void *left, void *root, uint32_t depth, leafvalue *lv, } else { //if (level > reinterpret_cast(this->root_)->level()) // return ; - p = reinterpret_cast (this->root_); + p = reinterpret_cast (ptr_from_off(this->root_)); } while (p->level() > level) { @@ -1405,13 +1461,13 @@ int masstree::merge(void *left, void *root, uint32_t depth, leafvalue *lv, fence(); if (p->key(kx_.p) <= key) { if (snapshot_v == p->value(kx_.p)) - p = reinterpret_cast(snapshot_v); + p = reinterpret_cast(ptr_from_off(snapshot_v)); else { goto inter_retry; } } } else { - p = p->leftmost(); + p = reinterpret_cast(ptr_from_off(p->leftmost())); } } @@ -1435,7 +1491,7 @@ int masstree::merge(void *left, void *root, uint32_t depth, leafvalue *lv, void *masstree::get(uint64_t key) { - void *root = this->root_; + void *root = ptr_from_off(this->root_); key_indexed_position kx_; leafnode *next; @@ -1453,19 +1509,19 @@ void *masstree::get(uint64_t key) kx_ = p->key_lower_bound(key); - void *snapshot_v; + uint64_t snapshot_v; if (kx_.i >= 0) { snapshot_v = p->value(kx_.p); fence(); if (p->key(kx_.p) <= key) { if (snapshot_v == p->value(kx_.p)) - p = reinterpret_cast(snapshot_v); + p = reinterpret_cast(ptr_from_off(snapshot_v)); else { goto inter_retry; } } } else { - p = p->leftmost(); + p = reinterpret_cast(ptr_from_off(p->leftmost())); } } @@ -1476,11 +1532,11 @@ void *masstree::get(uint64_t key) kx_ = l->key_lower_bound_by(key); - void *snapshot_v = l->value(kx_.p); + uint64_t snapshot_v = l->value(kx_.p); fence(); if (kx_.p >= 0 && l->key(kx_.p) == key) { if (snapshot_v == l->value(kx_.p)) - return snapshot_v; + return (void *)snapshot_v; else { l = l->advance_to_key(key, false); goto leaf_retry; @@ -1500,10 +1556,10 @@ void *masstree::get(uint64_t key) } if (l->next_()) { - cp = l->next_()->permute(); - printf("next high key = %lu\n", l->next_()->highest_()); + cp = ((leafnode *)ptr_from_off(l->next_()))->permute(); + printf("next high key = %lu\n", ((leafnode *)ptr_from_off(l->next_()))->highest_()); for (int i = 0; i < cp.size(); i++) { - printf("next key = %lu\n", l->next_()->key(cp[i])); + printf("next key = %lu\n", ((leafnode *)ptr_from_off(l->next_()))->key(cp[i])); } } exit(0); @@ -1513,11 +1569,11 @@ void *masstree::get(uint64_t key) void *masstree::get(char *key) { - void *root = this->root_; + void *root = ptr_from_off(this->root_); key_indexed_position kx_; uint32_t depth = 0; leafnode *next; - void *snapshot_v; + uint64_t snapshot_v; leafvalue *lv = make_leaf(key, strlen(key), 0); @@ -1541,13 +1597,13 @@ void *masstree::get(char *key) fence(); if (p->key(kx_.p) <= lv->fkey[depth]) { if (snapshot_v == p->value(kx_.p)) - p = reinterpret_cast(snapshot_v); + p = reinterpret_cast(ptr_from_off(snapshot_v)); else { goto inter_retry; } } } else { - p = p->leftmost(); + p = reinterpret_cast(ptr_from_off(p->leftmost())); } } @@ -1562,16 +1618,16 @@ void *masstree::get(char *key) snapshot_v = l->value(kx_.p); if (!IS_LV(l->value(kx_.p))) { if (l->key(kx_.p) == lv->fkey[depth] && snapshot_v == l->value(kx_.p)) { - p = reinterpret_cast (snapshot_v); + p = reinterpret_cast (ptr_from_off(snapshot_v)); depth++; goto from_root; } } else { - snapshot_v = &((LV_PTR(l->value(kx_.p)))->value); - if (l->key(kx_.p) == lv->fkey[depth] && (LV_PTR(l->value(kx_.p)))->key_len == lv->key_len - && memcmp((LV_PTR(l->value(kx_.p)))->fkey, lv->fkey, lv->key_len) == 0) { - if (snapshot_v == &((LV_PTR(l->value(kx_.p)))->value)) - return snapshot_v; + snapshot_v = (uint64_t) &(((leafvalue *)ptr_from_off((LV_PTR(l->value(kx_.p)))))->value); + if (l->key(kx_.p) == lv->fkey[depth] && ((leafvalue *)ptr_from_off((LV_PTR(l->value(kx_.p)))))->key_len == lv->key_len + && memcmp(((leafvalue *)ptr_from_off((LV_PTR(l->value(kx_.p)))))->fkey, lv->fkey, lv->key_len) == 0) { + if (snapshot_v == (uint64_t) &(((leafvalue *)ptr_from_off((LV_PTR(l->value(kx_.p)))))->value)) + return (void *) snapshot_v; } else { return NULL; } @@ -1593,13 +1649,13 @@ void *masstree::get(char *key) permuter cp = l->permute(); for (int i = 0; i < cp.size(); i++) { printf("key = %lu\n", l->key(cp[i])); - printf("fkey = %s\n", (char *)((LV_PTR(l->value(cp[i])))->fkey)); + printf("fkey = %s\n", (char *)(((leafvalue *)ptr_from_off((LV_PTR(l->value(cp[i])))))->fkey)); } if (l->next_()) { - cp = l->next_()->permute(); + cp = ((leafnode *)ptr_from_off(l->next_()))->permute(); for (int i = 0; i < cp.size(); i++) { - printf("next key = %lu\n", l->next_()->key(cp[i])); + printf("next key = %lu\n", ((leafnode *)ptr_from_off(l->next_()))->key(cp[i])); } } exit(0); @@ -1611,7 +1667,7 @@ void leafnode::get_range(leafvalue * &lv, int num, int &count, leafvalue *buf[], { key_indexed_position kx_; leafnode *next; - void *snapshot_v, *snapshot_n; + uint64_t snapshot_v, snapshot_n; permuter perm; int backup; @@ -1635,13 +1691,13 @@ void leafnode::get_range(leafvalue * &lv, int num, int &count, leafvalue *buf[], fence(); if (p->key(kx_.p) <= lv->fkey[depth]) { if (snapshot_v == p->value(kx_.p)) - p = reinterpret_cast(snapshot_v); + p = reinterpret_cast(ptr_from_off(snapshot_v)); else { goto inter_retry; } } } else { - p = p->leftmost(); + p = reinterpret_cast(ptr_from_off(p->leftmost())); } } @@ -1661,25 +1717,25 @@ void leafnode::get_range(leafvalue * &lv, int num, int &count, leafvalue *buf[], fence(); if (!IS_LV(l->value(perm[i]))) { if (l->key(perm[i]) > lv->fkey[depth] && snapshot_v == l->value(perm[i])) { - p = reinterpret_cast (snapshot_v); + p = reinterpret_cast (ptr_from_off(snapshot_v)); leafvalue *smallest = p->smallest_leaf(lv->key_len, lv->value); p->get_range(smallest, num, count, buf, p, depth + 1); } else if (l->key(perm[i]) == lv->fkey[depth] && snapshot_v == l->value(perm[i])) { - p = reinterpret_cast (snapshot_v); + p = reinterpret_cast (ptr_from_off(snapshot_v)); p->get_range(lv, num, count, buf, p, depth + 1); } } else { snapshot_v = (LV_PTR(snapshot_v)); if (l->key(perm[i]) > lv->fkey[depth]) { if (snapshot_v == (LV_PTR(l->value(perm[i])))) - buf[count++] = reinterpret_cast (snapshot_v); + buf[count++] = reinterpret_cast (ptr_from_off(snapshot_v)); else { count = backup; goto leaf_retry; } - } else if (l->key(perm[i]) == lv->fkey[depth] && memcmp((LV_PTR(l->value(perm[i])))->fkey, lv->fkey, lv->key_len) >= 0) { + } else if (l->key(perm[i]) == lv->fkey[depth] && memcmp(((leafvalue *)ptr_from_off((LV_PTR(l->value(perm[i])))))->fkey, lv->fkey, lv->key_len) >= 0) { if (snapshot_v == (LV_PTR(l->value(perm[i])))) - buf[count++] = reinterpret_cast (snapshot_v); + buf[count++] = reinterpret_cast (ptr_from_off(snapshot_v)); else { count = backup; goto leaf_retry; @@ -1692,21 +1748,21 @@ void leafnode::get_range(leafvalue * &lv, int num, int &count, leafvalue *buf[], count = backup; continue; } else { - if (snapshot_n == NULL) + if (snapshot_n == 0) break; else - l = reinterpret_cast (snapshot_n); + l = reinterpret_cast (ptr_from_off(snapshot_n)); } } } int masstree::scan(char *min, int num, leafvalue *buf[]) { - void *root = this->root_; + void *root = ptr_from_off(this->root_); key_indexed_position kx_; uint32_t depth = 0; leafnode *next; - void *snapshot_v, *snapshot_n; + uint64_t snapshot_v, snapshot_n; permuter perm; int count, backup; @@ -1731,13 +1787,13 @@ int masstree::scan(char *min, int num, leafvalue *buf[]) fence(); if (p->key(kx_.p) <= lv->fkey[depth]) { if (snapshot_v == p->value(kx_.p)) - p = reinterpret_cast(snapshot_v); + p = reinterpret_cast(ptr_from_off(snapshot_v)); else { goto inter_retry; } } } else { - p = p->leftmost(); + p = reinterpret_cast(ptr_from_off(p->leftmost())); } } @@ -1757,25 +1813,25 @@ int masstree::scan(char *min, int num, leafvalue *buf[]) mfence(); if (!IS_LV(l->value(perm[i]))) { if (l->key(perm[i]) > lv->fkey[depth] && snapshot_v == l->value(perm[i])) { - p = reinterpret_cast (snapshot_v); + p = reinterpret_cast (ptr_from_off(snapshot_v)); leafvalue *smallest = p->smallest_leaf(lv->key_len, lv->value); p->get_range(smallest, num, count, buf, p, depth + 1); } else if (l->key(perm[i]) == lv->fkey[depth] && snapshot_v == l->value(perm[i])) { - p = reinterpret_cast (snapshot_v); + p = reinterpret_cast (ptr_from_off(snapshot_v)); p->get_range(lv, num, count, buf, p, depth + 1); } } else { snapshot_v = (LV_PTR(snapshot_v)); if (l->key(perm[i]) > lv->fkey[depth]) { if (snapshot_v == (LV_PTR(l->value(perm[i])))) - buf[count++] = reinterpret_cast (snapshot_v); + buf[count++] = reinterpret_cast (ptr_from_off(snapshot_v)); else { count = backup; goto leaf_retry; } - } else if (l->key(perm[i]) == lv->fkey[depth] && memcmp((LV_PTR(l->value(perm[i])))->fkey, lv->fkey, lv->key_len) >= 0) { + } else if (l->key(perm[i]) == lv->fkey[depth] && memcmp(((leafvalue *)ptr_from_off((LV_PTR(l->value(perm[i])))))->fkey, lv->fkey, lv->key_len) >= 0) { if (snapshot_v == (LV_PTR(l->value(perm[i])))) - buf[count++] = reinterpret_cast (snapshot_v); + buf[count++] = reinterpret_cast (ptr_from_off(snapshot_v)); else { count = backup; goto leaf_retry; @@ -1788,10 +1844,10 @@ int masstree::scan(char *min, int num, leafvalue *buf[]) count = backup; continue; } else { - if (snapshot_n == NULL) + if (snapshot_n == 0) break; else - l = reinterpret_cast (snapshot_n); + l = reinterpret_cast (ptr_from_off(snapshot_n)); } } @@ -1800,12 +1856,12 @@ int masstree::scan(char *min, int num, leafvalue *buf[]) int masstree::scan(uint64_t min, int num, uint64_t *buf) { - void *root = this->root_; + void *root = ptr_from_off(this->root_); key_indexed_position kx_; uint32_t depth = 0; leafnode *next; - void *snapshot_v; - leafnode *snapshot_n; + uint64_t snapshot_v; + uint64_t snapshot_n; permuter perm; int count, backup; @@ -1828,13 +1884,13 @@ int masstree::scan(uint64_t min, int num, uint64_t *buf) fence(); if (p->key(kx_.p) <= min) { if (snapshot_v == p->value(kx_.p)) - p = reinterpret_cast(snapshot_v); + p = reinterpret_cast(ptr_from_off(snapshot_v)); else { goto inter_retry; } } } else { - p = p->leftmost(); + p = reinterpret_cast(ptr_from_off(p->leftmost())); } } @@ -1865,10 +1921,10 @@ int masstree::scan(uint64_t min, int num, uint64_t *buf) count = backup; continue; } else { - if (snapshot_n == NULL) + if (snapshot_n == 0) break; else - l = snapshot_n; + l = (leafnode *)ptr_from_off(snapshot_n); } } diff --git a/P-Masstree/masstree.h b/P-Masstree/masstree.h index fae20e72..308cf9be 100644 --- a/P-Masstree/masstree.h +++ b/P-Masstree/masstree.h @@ -13,6 +13,9 @@ #include "tbb/concurrent_vector.h" #endif +#include +#include + namespace masstree { #define LEAF_WIDTH 15 @@ -23,17 +26,17 @@ namespace masstree { #define LV_BITS (1ULL << 0) #define IS_LV(x) ((uintptr_t)x & LV_BITS) -#define LV_PTR(x) (leafvalue*)((void*)((uintptr_t)x & ~LV_BITS)) +#define LV_PTR(x) ((uint64_t)((uintptr_t)x & ~LV_BITS)) #define SET_LV(x) ((void*)((uintptr_t)x | LV_BITS)) class kv { private: uint64_t key; - void *value; + uint64_t value; public: kv() { key = UINT64_MAX; - value = NULL; + value = 0; } friend class leafnode; @@ -57,16 +60,18 @@ typedef struct key_indexed_position { class masstree { private: - void *root_; + uint64_t root_; public: - masstree(); + masstree (); masstree (void *new_root); ~masstree() { } - void *root() {return root_;} + void *operator new(size_t size); + + uint64_t root() {return root_;} void setNewRoot(void *new_root); @@ -319,9 +324,9 @@ class leafnode { private: uint32_t level_; // 4bytes uint32_t version_; // 4bytes - std::mutex *wlock; // 8bytes - leafnode *next; // 8bytes - leafnode *leftmost_ptr; // 8bytes + uint64_t wlock; // 8bytes + uint64_t next; // 8bytes + uint64_t leftmost_ptr; // 8bytes uint64_t highest; // 8bytes permuter permutation; // 8bytes uint64_t dummy[2]; // 16bytes @@ -344,7 +349,7 @@ class leafnode { void unlock(); - bool trylock(); + int trylock(); int compare_key(const uint64_t a, const uint64_t b); @@ -386,11 +391,11 @@ class leafnode { uint64_t key(int i) {return entry[i].key;} - void *value(int i) {return entry[i].value;} + uint64_t value(int i) {return entry[i].value;} - leafnode *leftmost() {return leftmost_ptr;} + uint64_t leftmost() {return leftmost_ptr;} - leafnode *next_() {return next;} + uint64_t next_() {return next;} uint64_t highest_() {return highest;} @@ -409,5 +414,12 @@ class leafnode { leafnode *search_for_leftsibling(void *root, uint64_t key, uint32_t level, leafnode *right); }; +// Initialize the persistent memory pool +//POBJ_LAYOUT_BEGIN(p_masstree); +//POBJ_LAYOUT_ROOT(p_masstree, masstree); +//POBJ_LAYOUT_TOID(p_masstree, leafnode); +//POBJ_LAYOUT_TOID(p_masstree, leafvalue); +//POBJ_LAYOUT_END(p_masstree); + } #endif From 9782868d0c27a7c093afd7a064618c7c0f079c13 Mon Sep 17 00:00:00 2001 From: pyrito Date: Fri, 10 Apr 2020 21:29:49 -0500 Subject: [PATCH 20/30] Integrated correct transactions --- P-CLHT/example.cpp | 6 +-- P-CLHT/src/clht_lb_res.c | 109 ++++++++++++++++++++++++--------------- 2 files changed, 69 insertions(+), 46 deletions(-) diff --git a/P-CLHT/example.cpp b/P-CLHT/example.cpp index ac4c0a05..46012bb4 100644 --- a/P-CLHT/example.cpp +++ b/P-CLHT/example.cpp @@ -96,7 +96,7 @@ void run(char **argv) { thread_group[i].join(); auto duration = std::chrono::duration_cast( std::chrono::system_clock::now() - starttime); - printf("Throughput: load, %f ,ops/us\n", (n * 1.0) / duration.count()); + printf("Throughput: load, %f ,ops/s\n", (n * 1.0) / (duration.count()/1000000.0)); } barrier.crossing = 0; @@ -135,9 +135,9 @@ void run(char **argv) { thread_group[i].join(); auto duration = std::chrono::duration_cast( std::chrono::system_clock::now() - starttime); - printf("Throughput: run, %f ,ops/us\n", (n * 1.0) / duration.count()); + printf("Throughput: run, %f ,ops/s\n", (n * 1.0) / (duration.count()/1000000.0)); } - clht_gc_destroy(hashtable); + // clht_gc_destroy(hashtable); delete[] keys; } diff --git a/P-CLHT/src/clht_lb_res.c b/P-CLHT/src/clht_lb_res.c index 2e99a3d7..eb3ec038 100644 --- a/P-CLHT/src/clht_lb_res.c +++ b/P-CLHT/src/clht_lb_res.c @@ -207,13 +207,29 @@ clht_bucket_create() { bucket_t* bucket = NULL; PMEMoid bucket_oid; - if (pmemobj_alloc(pop, &bucket_oid, sizeof(bucket_t), 0, bucket_init, 0)) +#if PMDK_TRANSACTION +TX_BEGIN(pop) { + bucket_oid = pmemobj_tx_alloc(sizeof(bucket_t), TOID_TYPE_NUM(bucket_t)); + bucket = pmemobj_direct(bucket_oid); + bucket->lock = 0; + + uint32_t j; + for (j = 0; j < ENTRIES_PER_BUCKET; j++) + { + bucket->key[j] = 0; + } + bucket->next_off = OID_NULL.off; +} TX_END + #else + if (pmemobj_alloc(pop, &bucket_oid, sizeof(bucket_t), TOID_TYPE_NUM(bucket_t), bucket_init, 0)) { fprintf(stderr, "pmemobj_alloc failed for clht_bucket_create\n"); assert(0); } - // bucket = (bucket_t *) memalign(CACHE_LINE_SIZE, sizeof(bucket_t)); bucket = pmemobj_direct(bucket_oid); +#endif + // bucket = (bucket_t *) memalign(CACHE_LINE_SIZE, sizeof(bucket_t)); + if (bucket == NULL) { return NULL; @@ -261,10 +277,12 @@ clht_t* clht_open() { pool_uuid = my_root.pool_uuid_lo; clht_t* w = pmemobj_direct(my_root); - printf("my_root.off: %d\n", my_root.off); + printf("my_root.off: %ld\n", my_root.off); return w; } +// clht_hashtable_t* g_ptr; + clht_t* clht_create(uint64_t num_buckets) { @@ -293,7 +311,7 @@ clht_create(uint64_t num_buckets) pool_uuid = my_root.pool_uuid_lo; clht_t* w = pmemobj_direct(my_root); - printf("my_root.off: %d\n", my_root.off); + printf("my_root.off: %ld\n", my_root.off); if (w == NULL) { @@ -304,20 +322,12 @@ clht_create(uint64_t num_buckets) clht_hashtable_t* ht_ptr; // Transactional allocation -#if PMDK_TRANSACTION - TX_BEGIN(pop) { - ht_ptr= clht_hashtable_create(num_buckets); - printf("clht_create ht_ptr->table.off: %d\n", ht_ptr->table_off); - //pmemobj_tx_abort(-1); - w->ht_off = pmemobj_oid(ht_ptr).off; - } TX_ONABORT { - printf("Failed clht_hashtable_create, rolling back\n"); - } TX_END; -#else - ht_ptr= clht_hashtable_create(num_buckets); - printf("clht_create ht_ptr->table.off: %d\n", ht_ptr->table_off); + ht_ptr = clht_hashtable_create(num_buckets); + // printf("g_ptr after abort: %p\n", g_ptr); + // PMEMoid temp = pmemobj_oid(g_ptr); + // printf("temp.offset: %d, temp.pool: %d\n", temp.off, temp.pool_uuid_lo); + printf("clht_create ht_ptr->table.off: %ld\n", ht_ptr->table_off); w->ht_off = pmemobj_oid(ht_ptr).off; -#endif if (ht_ptr == NULL) { @@ -340,6 +350,7 @@ clht_create(uint64_t num_buckets) return w; } + clht_hashtable_t* clht_hashtable_create(uint64_t num_buckets) { @@ -354,12 +365,20 @@ clht_hashtable_create(uint64_t num_buckets) // hashtable = (clht_hashtable_t*) memalign(CACHE_LINE_SIZE, sizeof(clht_hashtable_t)); // Allocate the table in persistent memory PMEMoid ht_oid; - if (pmemobj_alloc(pop, &ht_oid, sizeof(clht_hashtable_t), 0, 0, 0)) { +#if PMDK_TRANSACTION +TX_BEGIN(pop) { + ht_oid = pmemobj_tx_alloc(sizeof(clht_hashtable_t), TOID_TYPE_NUM(clht_hashtable_t)); +#else + if (pmemobj_alloc(pop, &ht_oid, sizeof(clht_hashtable_t), TOID_TYPE_NUM(clht_hashtable_t), 0, 0)) + { fprintf(stderr, "pmemobj_alloc failed for clht_hashtable_create\n"); assert(0); } +#endif hashtable = pmemobj_direct(ht_oid); - + // g_ptr = hashtable; + // printf("g_ptr: %p\n", g_ptr); + if (hashtable == NULL) { printf("** malloc @ hashtable\n"); @@ -369,13 +388,17 @@ clht_hashtable_create(uint64_t num_buckets) /* hashtable->table = calloc(num_buckets, (sizeof(bucket_t))); */ // hashtable->table = (bucket_t*) memalign(CACHE_LINE_SIZE, num_buckets * (sizeof(bucket_t))); PMEMoid table_oid; - if (pmemobj_alloc(pop, &table_oid, num_buckets * sizeof(bucket_t), 0, 0, 0)) { +#if PMDK_TRANSACTION + table_oid = pmemobj_tx_zalloc(num_buckets * sizeof(bucket_t), TOID_TYPE_NUM(bucket_t)); +#else + if (pmemobj_alloc(pop, &table_oid, num_buckets * sizeof(bucket_t), TOID_TYPE_NUM(bucket_t), 0, 0)) + { fprintf(stderr, "pmemobj_alloc failed for table_oid in clht_hashtable_create\n"); assert(0); } - +#endif hashtable->table_off = table_oid.off; - + bucket_t* bucket_ptr = clht_ptr_from_off(hashtable->table_off); if (bucket_ptr == NULL) @@ -385,7 +408,7 @@ clht_hashtable_create(uint64_t num_buckets) return NULL; } - memset(bucket_ptr, 0, num_buckets * (sizeof(bucket_t))); + //memset(bucket_ptr, 0, num_buckets * (sizeof(bucket_t))); uint64_t i; for (i = 0; i < num_buckets; i++) @@ -397,7 +420,7 @@ clht_hashtable_create(uint64_t num_buckets) bucket_ptr[i].key[j] = 0; } } - + hashtable->num_buckets = num_buckets; hashtable->hash = num_buckets - 1; hashtable->version = 0; @@ -413,6 +436,9 @@ clht_hashtable_create(uint64_t num_buckets) hashtable->is_helper = 1; hashtable->helper_done = 0; +#if PMDK_TRANSACTION +} TX_END +#endif return hashtable; } @@ -540,6 +566,8 @@ clht_put(clht_t* h, clht_addr_t key, clht_val_t val) bucket_t* b; #if PMDK_TRANSACTION TX_BEGIN(pop) { + pmemobj_tx_add_range_direct((const void*)&(bucket->next_off), sizeof(uint64_t)); + // printf("made it here at least\n"); b = clht_bucket_create_stats(hashtable, &resize); b->val[0] = val; #ifdef __tile__ @@ -644,6 +672,7 @@ clht_remove(clht_t* h, clht_addr_t key) // May not need this, if there is a crash, remove will not be persisted #if PMDK_TRANSACTION TX_BEGIN(pop) { + pmemobj_tx_add_range_direct((const void*)&(bucket->key[j]), sizeof(clht_addr_t)); bucket->key[j] = 0; clflush((char *)&bucket->key[j], sizeof(uintptr_t), true); } TX_END @@ -676,6 +705,8 @@ clht_put_seq(clht_hashtable_t* hashtable, clht_addr_t key, clht_val_t val, uint6 { #if PMDK_TRANSACTION TX_BEGIN(pop) { + pmemobj_tx_add_range_direct((const void*)&(bucket->key[j]), sizeof(clht_addr_t)); + pmemobj_tx_add_range_direct((const void*)&(bucket->val[j]), sizeof(clht_val_t)); bucket->val[j] = val; bucket->key[j] = key; } TX_END @@ -695,6 +726,8 @@ clht_put_seq(clht_hashtable_t* hashtable, clht_addr_t key, clht_val_t val, uint6 bucket_t* bucket_ptr = clht_ptr_from_off(bucket->next_off); #if PMDK_TRANSACTION TX_BEGIN(pop) { + pmemobj_tx_add_range_direct((const void*)&(bucket->key[0]), sizeof(clht_addr_t)); + pmemobj_tx_add_range_direct((const void*)&(bucket->val[0]), sizeof(clht_val_t)); bucket_ptr->val[0] = val; bucket_ptr->key[0] = key; } TX_ONABORT { @@ -798,6 +831,7 @@ ht_resize_help(clht_hashtable_t* h) int ht_resize_pes(clht_t* h, int is_increase, int by) { + ticks s = getticks(); check_ht_status_steps = CLHT_STATUS_INVOK; @@ -824,7 +858,10 @@ ht_resize_pes(clht_t* h, int is_increase, int by) } printf("// resizing: from %8zu to %8zu buckets\n", ht_old->num_buckets, num_buckets_new); - + +#if PMDK_TRANSACTION + TX_BEGIN(pop) { +#endif clht_hashtable_t* ht_new = clht_hashtable_create(num_buckets_new); ht_new->version = ht_old->version + 1; @@ -921,7 +958,7 @@ ht_resize_pes(clht_t* h, int is_increase, int by) uint64_t ht_new_oid_off = ht_new_oid.off; // uint64_t h_new = (uint64_t)h + sizeof(uint64_t); uint64_t* h_new = &(h->ht_off); - + pmemobj_tx_add_range_direct(&(h->ht_off), sizeof(uint64_t)); //SWAP_U64((uint64_t*) h, (uint64_t) ht_new); SWAP_U64((uint64_t*)h_new, ht_new_oid_off); @@ -985,7 +1022,9 @@ ht_resize_pes(clht_t* h, int is_increase, int by) { ht_status(h, 1, 0); } - +#if PMDK_TRANSACTION + } TX_END +#endif return 1; } @@ -1078,15 +1117,7 @@ ht_status(clht_t* h, int resize_increase, int just_print) { // printf("[STATUS-%02d] #bu: %7zu / #elems: %7zu / full%%: %8.4f%% / expands: %4d / max expands: %2d\n", // clht_gc_get_id(), hashtable->num_buckets, size, full_ratio, expands, expands_max); -#if PMDK_TRANSACTION - TX_BEGIN(pop) { - ht_resize_pes(h, 0, 33); - } TX_ONABORT { - printf("Failed ht_resize_pes, rolling back\n"); - } TX_END -#else ht_resize_pes(h, 0, 33); -#endif } else if ((full_ratio > 0 && full_ratio > CLHT_PERC_FULL_DOUBLE) || expands_max > CLHT_MAX_EXPANSIONS || resize_increase) @@ -1102,15 +1133,7 @@ ht_status(clht_t* h, int resize_increase, int just_print) } DEBUG_PRINT("Callig ht_resize_pes\n"); int ret = 0; -#if PMDK_TRANSACTION - TX_BEGIN(pop) { - ret = ht_resize_pes(h, 1, inc_by_pow2); - } TX_ONABORT { - printf("Failed ht_resize_pes, rolling back\n"); - } TX_END; -#else ret = ht_resize_pes(h, 1, inc_by_pow2); -#endif // return if crashed if (ret == -1) return 0; From 77bc50daa736395649f4cd712189b7cc3127af8d Mon Sep 17 00:00:00 2001 From: Chameleon Cloud User Date: Sun, 12 Apr 2020 16:25:48 +0000 Subject: [PATCH 21/30] [P-Masstree] Update compile options and minor changes --- P-Masstree/CMakeLists.txt | 11 +---------- P-Masstree/masstree.cpp | 4 ++-- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/P-Masstree/CMakeLists.txt b/P-Masstree/CMakeLists.txt index 36b95dc1..1b1a1039 100644 --- a/P-Masstree/CMakeLists.txt +++ b/P-Masstree/CMakeLists.txt @@ -33,16 +33,7 @@ endif() find_library(JemallocLib jemalloc) find_library(TbbLib tbb) -if(PKG_CONFIG_FOUND) - pkg_check_modules(LIBPMEMOBJ++ REQUIRED libpmemobj++) -else() - find_package(LIBPMEMOBJ++ REQUIRED) -endif() - -link_directories(${LIBPMEMOBJ++_LIBRARY_DIRS}) - set(P_MASS_TEST example.cpp masstree.cpp) add_executable(example ${P_MASS_TEST}) -target_link_libraries(example ${JemallocLib} ${TbbLib} - ${LIBPMEMOBJ++_LIBRARIES} atomic boost_system boost_thread) +target_link_libraries(example ${JemallocLib} ${TbbLib} pmemobj pmem atomic boost_system boost_thread) diff --git a/P-Masstree/masstree.cpp b/P-Masstree/masstree.cpp index f2e6a239..378bc582 100644 --- a/P-Masstree/masstree.cpp +++ b/P-Masstree/masstree.cpp @@ -85,12 +85,12 @@ masstree::masstree () { masstree::masstree (void *new_root) { root_ = pmemobj_oid(new_root).off; - clflush((char *)ptr_from_off(root_), sizeof(leafnode), true); // 304 is the leafnode size of masstree + clflush((char *)ptr_from_off(root_), sizeof(leafnode), true); } void *masstree::operator new(size_t size) { // Open the PMEMpool if it exists, otherwise create it. - size_t pool_size = 2*1024*1024*1024UL; + size_t pool_size = 8*1024*1024*1024UL; if (access("/mnt/pmem/pool", F_OK) != -1) pop = pmemobj_open("/mnt/pmem/pool", POBJ_LAYOUT_NAME(p_masstree)); else From c8182e32cc12bad903c0b601987bcf3cb7dad0c6 Mon Sep 17 00:00:00 2001 From: Sekwon Lee Date: Sun, 12 Apr 2020 16:30:24 +0000 Subject: [PATCH 22/30] Update pmdk document 1. Add system requirements: the huge performance drop in PMDK was observed in old kernel versions (v4.X). After changing the kernel to latest version (v5.3), it becomes performing well. 2. Change the instructions of installing pmdk to use latest-stable branch. --- pmdk.md | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/pmdk.md b/pmdk.md index 06b5bcfd..a666a2b1 100644 --- a/pmdk.md +++ b/pmdk.md @@ -22,12 +22,19 @@ clht_t* w = pmemobj_direct(my_root); **Limitations** Currently, CLHT is the only data structure that has been converted to PMDK. We plan on updating the other data structures in the near future. ## Build & Run +### System requirements +- Ubuntu 18.04.1 LTS +- P-HOT: x86-64 CPU supporting at least the AVX-2 and BMI-2 instruction sets (Haswell and newer) +- Linux kernel: v5.3 or later (The huge performance drop in PMDK was observed on the old kernel versions) +- Compiler: cmake, g++-7, gcc-7, c++17 + ### How to enable PM? 1. Install PMDK -```$ git clone https://github.com/pmem/pmdk.git +$ git clone https://github.com/pmem/pmdk.git $ cd pmdk -$ git checkout tags/1.6 +$ git checkout tags/1.8 $ make -j +$ sudo make install $ cd .. ``` 2. Emulate PM with Ext4-DAX mount @@ -50,4 +57,4 @@ if( access("/mnt/pmem/pool", F_OK ) != -1 ) } ``` -4. Make accordingly and run your code. \ No newline at end of file +4. Make accordingly and run your code. From 1ec6380490517c4b0e8966b7f1c5bc88c9ffee68 Mon Sep 17 00:00:00 2001 From: Sekwon Lee Date: Sun, 12 Apr 2020 18:34:07 +0000 Subject: [PATCH 23/30] [P-CLHT] update compile options --- P-CLHT/CMakeLists.txt | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/P-CLHT/CMakeLists.txt b/P-CLHT/CMakeLists.txt index 07b4bfab..2950e814 100644 --- a/P-CLHT/CMakeLists.txt +++ b/P-CLHT/CMakeLists.txt @@ -38,24 +38,16 @@ find_library(JemallocLib jemalloc) find_library(TbbLib tbb) find_package (Threads) -if(PKG_CONFIG_FOUND) - pkg_check_modules(LIBPMEMOBJ++ REQUIRED libpmemobj++) -else() - find_package(LIBPMEMOBJ++ REQUIRED) -endif() - -link_directories(${LIBPMEMOBJ++_LIBRARY_DIRS}) - set(P_CLHT_TEST example.cpp src/clht_lb_res.c src/clht_gc.c external/sspfd/sspfd.c external/ssmem/src/ssmem.c) add_executable(example ${P_CLHT_TEST}) -target_link_libraries(example ${TbbLib} ${JemallocLib} ${LIBPMEMOBJ++_LIBRARIES} boost_system +target_link_libraries(example ${TbbLib} ${JemallocLib} pmemobj pmem boost_system boost_thread pthread) set(P_CLHT_TEST test.cpp src/clht_lb_res.c src/clht_gc.c external/sspfd/sspfd.c external/ssmem/src/ssmem.c) add_executable(test ${P_CLHT_TEST}) -target_link_libraries(test ${TbbLib} ${JemallocLib} ${LIBPMEMOBJ++_LIBRARIES} boost_system +target_link_libraries(test ${TbbLib} ${JemallocLib} pmemobj pmem boost_system boost_thread pthread) From 3d82a49c28ae12c50cf34051f5a612aa8acbddc2 Mon Sep 17 00:00:00 2001 From: pyrito Date: Sun, 12 Apr 2020 14:12:27 -0500 Subject: [PATCH 24/30] Fixed bug in clht_lb_res.c --- P-CLHT/src/clht_lb_res.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/P-CLHT/src/clht_lb_res.c b/P-CLHT/src/clht_lb_res.c index eb3ec038..a0ff0859 100644 --- a/P-CLHT/src/clht_lb_res.c +++ b/P-CLHT/src/clht_lb_res.c @@ -958,7 +958,9 @@ ht_resize_pes(clht_t* h, int is_increase, int by) uint64_t ht_new_oid_off = ht_new_oid.off; // uint64_t h_new = (uint64_t)h + sizeof(uint64_t); uint64_t* h_new = &(h->ht_off); +#if PMDK_TRANSACTION pmemobj_tx_add_range_direct(&(h->ht_off), sizeof(uint64_t)); +#endif //SWAP_U64((uint64_t*) h, (uint64_t) ht_new); SWAP_U64((uint64_t*)h_new, ht_new_oid_off); From ed00f17af0784d4bb8c8b97ec6916aebf0614c08 Mon Sep 17 00:00:00 2001 From: Sekwon Lee Date: Tue, 14 Apr 2020 05:11:50 +0000 Subject: [PATCH 25/30] :sparkles: add the new feature to reload masstree --- P-Masstree/masstree.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/P-Masstree/masstree.cpp b/P-Masstree/masstree.cpp index 378bc582..be912a76 100644 --- a/P-Masstree/masstree.cpp +++ b/P-Masstree/masstree.cpp @@ -78,9 +78,11 @@ void lock_initialization() #endif masstree::masstree () { - leafnode *init_root = new leafnode(0); - root_ = pmemobj_oid(init_root).off; - clflush((char *)ptr_from_off(root_), sizeof(leafnode), true); + if (root_ == 0) { + leafnode *init_root = new leafnode(0); + root_ = pmemobj_oid(init_root).off; + clflush((char *)ptr_from_off(root_), sizeof(leafnode), true); + } } masstree::masstree (void *new_root) { From 270bb09893d7b8a297655890423fdbd9707aa9bd Mon Sep 17 00:00:00 2001 From: Sekwon Lee Date: Wed, 15 Apr 2020 04:57:46 +0000 Subject: [PATCH 26/30] Exchange free to pmemobj_free --- P-Masstree/Epoche.cpp | 10 ++++++++-- P-Masstree/masstree.cpp | 8 +++++++- P-Masstree/masstree.h | 2 +- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/P-Masstree/Epoche.cpp b/P-Masstree/Epoche.cpp index a1df89c9..f78e22cf 100644 --- a/P-Masstree/Epoche.cpp +++ b/P-Masstree/Epoche.cpp @@ -8,6 +8,8 @@ #include #include "Epoche.h" +#include + using namespace MASS; inline DeletionList::~DeletionList() { @@ -98,12 +100,14 @@ inline void Epoche::exitEpocheAndCleanup(ThreadInfo &epocheInfo) { } LabelDelete *cur = deletionList.head(), *next, *prev = nullptr; + PMEMoid free_objs; while (cur != nullptr) { next = cur->next; if (cur->epoche < oldestEpoche) { for (std::size_t i = 0; i < cur->nodesCount; ++i) { - free(cur->nodes[i]); + free_objs = pmemobj_oid(cur->nodes[i]); + pmemobj_free(&free_objs); } deletionList.remove(cur, prev); } else { @@ -125,12 +129,14 @@ inline Epoche::~Epoche() { } for (auto &d : deletionLists) { LabelDelete *cur = d.head(), *next, *prev = nullptr; + PMEMoid free_objs; while (cur != nullptr) { next = cur->next; assert(cur->epoche < oldestEpoche); for (std::size_t i = 0; i < cur->nodesCount; ++i) { - free(cur->nodes[i]); + free_objs = pmemobj_oid(cur->nodes[i]); + free(&free_objs); } d.remove(cur, prev); cur = next; diff --git a/P-Masstree/masstree.cpp b/P-Masstree/masstree.cpp index 125fbb9d..870c431f 100644 --- a/P-Masstree/masstree.cpp +++ b/P-Masstree/masstree.cpp @@ -156,6 +156,11 @@ leafnode::leafnode(void *left, uint64_t key, void *right, uint32_t level = 1) : #endif } +leafnode::~leafnode() { + PMEMoid free_lock = {pool_uuid, wlock}; + pmemobj_free(&free_lock); +} + void *leafnode::operator new(size_t size) { PMEMoid ret; if (pmemobj_alloc(pop, &ret, size, 0, 0, 0)) { @@ -166,7 +171,8 @@ void *leafnode::operator new(size_t size) { } void leafnode::operator delete(void *addr) { - free(addr); + PMEMoid leaf_oid = pmemobj_oid(addr); + pmemobj_free(&leaf_oid); } void leafnode::lock() {pmemobj_mutex_lock(pop, (PMEMmutex *)ptr_from_off(wlock));} diff --git a/P-Masstree/masstree.h b/P-Masstree/masstree.h index 2b9d416d..0201486c 100644 --- a/P-Masstree/masstree.h +++ b/P-Masstree/masstree.h @@ -343,7 +343,7 @@ class leafnode { leafnode(void *left, uint64_t key, void *right, uint32_t level); - ~leafnode () {delete wlock;} + ~leafnode (); void *operator new(size_t size); From 3514403fc71c4b58e70274d5ad660b0908d9b69e Mon Sep 17 00:00:00 2001 From: pyrito Date: Wed, 15 Apr 2020 23:03:55 -0500 Subject: [PATCH 27/30] Added some previously deleted code --- P-CLHT/src/clht_lb_res.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/P-CLHT/src/clht_lb_res.c b/P-CLHT/src/clht_lb_res.c index a0ff0859..14cd7cea 100644 --- a/P-CLHT/src/clht_lb_res.c +++ b/P-CLHT/src/clht_lb_res.c @@ -81,7 +81,7 @@ __thread size_t check_ht_status_steps = CLHT_STATUS_INVOK_IN; #endif */ -#define PMDK_TRANSACTION 1 +#define PMDK_TRANSACTION 0 const char* clht_type_desc() @@ -582,6 +582,8 @@ clht_put(clht_t* h, clht_addr_t key, clht_val_t val) } TX_FINALLY { clflush((char *)b, sizeof(bucket_t), true); bucket->next_off = pmemobj_oid(b).off; + bucket_t* next_ptr = clht_ptr_from_off(bucket->next_off); + clflush((char *)&next_ptr, sizeof(uintptr_t), true); } TX_ONABORT { printf("Failed clht_put, rolling back\n"); } TX_END @@ -669,7 +671,6 @@ clht_remove(clht_t* h, clht_addr_t key) if (bucket->key[j] == key) { clht_val_t val = bucket->val[j]; - // May not need this, if there is a crash, remove will not be persisted #if PMDK_TRANSACTION TX_BEGIN(pop) { pmemobj_tx_add_range_direct((const void*)&(bucket->key[j]), sizeof(clht_addr_t)); From 8d7af8a7422e37e32470c96abea830e667ff10c5 Mon Sep 17 00:00:00 2001 From: Sekwon Lee Date: Thu, 16 Apr 2020 05:49:02 +0000 Subject: [PATCH 28/30] Integrate clht_open to clht_create --- P-CLHT/include/clht_lb_res.h | 2 - P-CLHT/src/clht_lb_res.c | 79 ++++++++++++------------------------ 2 files changed, 26 insertions(+), 55 deletions(-) diff --git a/P-CLHT/include/clht_lb_res.h b/P-CLHT/include/clht_lb_res.h index b134eb5a..494dcdb4 100644 --- a/P-CLHT/include/clht_lb_res.h +++ b/P-CLHT/include/clht_lb_res.h @@ -412,8 +412,6 @@ lock_acq_rtm_chk_resize(clht_lock_t* lock, clht_hashtable_t* h) /* Create a new hashtable. */ clht_hashtable_t* clht_hashtable_create(uint64_t num_buckets); clht_t* clht_create(uint64_t num_buckets); -clht_t* clht_open(); - /* Insert a key-value pair into a hashtable. */ int clht_put(clht_t* hashtable, clht_addr_t key, clht_val_t val); diff --git a/P-CLHT/src/clht_lb_res.c b/P-CLHT/src/clht_lb_res.c index 14cd7cea..0db89d7b 100644 --- a/P-CLHT/src/clht_lb_res.c +++ b/P-CLHT/src/clht_lb_res.c @@ -253,34 +253,6 @@ clht_bucket_create_stats(clht_hashtable_t* h, int* resize) clht_hashtable_t* clht_hashtable_create(uint64_t num_buckets); -clht_t* clht_open() { - size_t pool_size = 2*1024*1024*1024UL; - if( access("/mnt/pmem/pool", F_OK ) != -1 ) - { - pop = pmemobj_open("/mnt/pmem/pool", POBJ_LAYOUT_NAME(clht)); - } else - { - perror("Pool does not already exist\n"); - } - - if (pop == NULL) - { - perror("failed to open the pool\n"); - } - - // Create the root pointer - PMEMoid my_root = pmemobj_root(pop, sizeof(clht_t)); - if (pmemobj_direct(my_root) == NULL) - { - perror("root pointer is null\n"); - } - pool_uuid = my_root.pool_uuid_lo; - - clht_t* w = pmemobj_direct(my_root); - printf("my_root.off: %ld\n", my_root.off); - return w; -} - // clht_hashtable_t* g_ptr; clht_t* @@ -319,38 +291,39 @@ clht_create(uint64_t num_buckets) return NULL; } - clht_hashtable_t* ht_ptr; + if (w->ht_off == 0) { + clht_hashtable_t* ht_ptr; - // Transactional allocation - ht_ptr = clht_hashtable_create(num_buckets); - // printf("g_ptr after abort: %p\n", g_ptr); - // PMEMoid temp = pmemobj_oid(g_ptr); - // printf("temp.offset: %d, temp.pool: %d\n", temp.off, temp.pool_uuid_lo); - printf("clht_create ht_ptr->table.off: %ld\n", ht_ptr->table_off); - w->ht_off = pmemobj_oid(ht_ptr).off; + // Transactional allocation + ht_ptr = clht_hashtable_create(num_buckets); + // printf("g_ptr after abort: %p\n", g_ptr); + // PMEMoid temp = pmemobj_oid(g_ptr); + // printf("temp.offset: %d, temp.pool: %d\n", temp.off, temp.pool_uuid_lo); + printf("clht_create ht_ptr->table.off: %ld\n", ht_ptr->table_off); + w->ht_off = pmemobj_oid(ht_ptr).off; - if (ht_ptr == NULL) - { - free(w); - return NULL; - } - - w->resize_lock = LOCK_FREE; - w->gc_lock = LOCK_FREE; - w->status_lock = LOCK_FREE; - w->version_list = NULL; - w->version_min = 0; - w->ht_oldest = ht_ptr; + if (ht_ptr == NULL) + { + free(w); + return NULL; + } - // This should flush everything to persistent memory - clflush((char *)clht_ptr_from_off(ht_ptr->table_off), num_buckets * sizeof(bucket_t), true); - clflush((char *)ht_ptr, sizeof(clht_hashtable_t), true); - clflush((char *)w, sizeof(clht_t), true); + w->resize_lock = LOCK_FREE; + w->gc_lock = LOCK_FREE; + w->status_lock = LOCK_FREE; + w->version_list = NULL; + w->version_min = 0; + w->ht_oldest = ht_ptr; + + // This should flush everything to persistent memory + clflush((char *)clht_ptr_from_off(ht_ptr->table_off), num_buckets * sizeof(bucket_t), true); + clflush((char *)ht_ptr, sizeof(clht_hashtable_t), true); + clflush((char *)w, sizeof(clht_t), true); + } return w; } - clht_hashtable_t* clht_hashtable_create(uint64_t num_buckets) { From d495e35efbd45bfec1fb3af3ffcab6fe3e33f894 Mon Sep 17 00:00:00 2001 From: Sekwon Lee Date: Thu, 16 Apr 2020 06:33:32 +0000 Subject: [PATCH 29/30] Remove comments related to DIMM --- P-Masstree/masstree.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/P-Masstree/masstree.cpp b/P-Masstree/masstree.cpp index 870c431f..eb858e70 100644 --- a/P-Masstree/masstree.cpp +++ b/P-Masstree/masstree.cpp @@ -869,13 +869,11 @@ void leafnode::assign(int p, const uint64_t& key, void *value) { entry[p].key = key; fence(); - // TODO: Need to add an identifier about which DIMM includes this value entry[p].value = (uint64_t) value; } void leafnode::assign_value(int p, void *value) { - // TODO: Need to add identifier about which DIMM includes this value entry[p].value = (uint64_t) value; clflush((char *)&entry[p].value, sizeof(uint64_t), true); } From 03af92d5fa9182cac18e3a9983a2329cc54fd6f8 Mon Sep 17 00:00:00 2001 From: pyrito Date: Fri, 17 Apr 2020 21:18:27 -0500 Subject: [PATCH 30/30] Modified README for P-CLHT --- P-CLHT/README.md | 57 +++++++++++++++++++++++++++++++++++---- P-CLHT/pmdk.md | 70 ------------------------------------------------ 2 files changed, 52 insertions(+), 75 deletions(-) delete mode 100644 P-CLHT/pmdk.md diff --git a/P-CLHT/README.md b/P-CLHT/README.md index ad788e60..009a2ee8 100644 --- a/P-CLHT/README.md +++ b/P-CLHT/README.md @@ -1,11 +1,10 @@ -## P-CLHT: Persistent Cache-Line Hash Table +## P-CLHT: Persistent Cache-Line Hash Table - PMDK -`P-CLHT` is a crash consistent version of [Cache-Line Hash Table](https://dl.acm.org/citation.cfm?id=2694359) (CLHT). +`P-CLHT` is a recoverable and crash-consistent version of [Cache-Line Hash Table](https://dl.acm.org/citation.cfm?id=2694359) (CLHT). CLHT is a cache-friendly hash table which restricts each bucket to be of the size of a cache line. CLHT is an unordered index only supporting point queries. -**Conversion**. `CLHT-LB` using lock-based writes for concurrency is converted into `P-CLHT` by adding cache -line flushes and memory fences after each critical volatile store. +**Conversion**. `CLHT-LB` using lock-based writes for concurrency is converted into `P-CLHT` by adding cache line flushes and memory fences after each critical volatile store. **Performance**. Compared with [CCEH](https://www.usenix.org/conference/fast19/presentation/nam) that is a state-of-the-art unordered index, `P-CLHT` shows **2.38x**, **1.35x**, and **1.25x** better performance in @@ -16,7 +15,55 @@ YCSB workload A, B, C respectively using random integer keys while **0.37x** wor **Use Case**. `P-CLHT` provides the superior performance of insertion and point lookup, even if not supporting range scans. Therefore, it would be appropriate to be used for the applications only consisting of point queries. +This branch of P-CLHT also uses PMDK to ensure the persistence and recoverability of the cache-line hash table. All other details of this data structure are the same (cache line flushing, alignment, etc) except for the backend library used to ensure persistence. + +**Motivation** The published implementation does not have a way of recovering permanent memory leaks during a crash. The PMDK library, specifically `libpmemobj`, gives us useful internal structures such as `pmemobj_root`, which is a stored offset within the persistent memory pool that can be used to recover any data that was left in a partial state, etc. + +**How We Used PMDK** The entire conversion required us to replace any data structure pointers to point to the persistent memory pool using the non-transactional, atomic allocation functions such as `pmemobj_alloc`. Since the `PMEMoid` structs (which store the pool offset and id) were 16 bytes, some code manipulation was required to ensure the cache-line alignment of the data structure. Finally, transactions were used for major hashtable operations such as insertion, resizing, and deletion. This part is still being tested and is a work-in-progress. If you look through the code and compare it with the `master` branch, you can see that the changes follow a logical pattern, and the modifications are relatively minor. + +**How to test recoverability?** The best way to recover your hashtable is following the paradigm presented in `clht_open` where all the user has to do is use `pmemobj_root` to recover the root (a clht_t object basically) of the persistent memory pool. Please make sure that you are opening the same pool with the correct pool layout! +``` +... +PMEMoid my_root = pmemobj_root(pop, sizeof(clht_t)); +if (pmemobj_direct(my_root) == NULL) +{ + perror("root pointer is null\n"); +} +... +clht_t* w = pmemobj_direct(my_root); +... +``` + ## Build & Run +### How to enable PM? +1. Install PMDK +```$ git clone https://github.com/pmem/pmdk.git +$ cd pmdk +$ git checkout tags/1.6 +$ make -j +$ cd .. +``` +2. Emulate PM with Ext4-DAX mount +```$ sudo mkfs.ext4 -b 4096 -E stride=512 -F /dev/pmem0 +$ sudo mount -o dax /dev/pmem0 /mnt/pmem +``` + +3. Set pool_size and pool name appropriately using `pmemobj_create`. For example: +``` +// Size of the memory pool +size_t pool_size = 2*1024*1024*1024UL; +if( access("/mnt/pmem/pool", F_OK ) != -1 ) +{ + // If the pool already exists, open it + pop = pmemobj_open("/mnt/pmem/pool", POBJ_LAYOUT_NAME(clht)); +} else +{ + // If the pool does not exist, create it + pop = pmemobj_create("/mnt/pmem/pool", POBJ_LAYOUT_NAME(clht), pool_size, 0666); +} +``` + +4. Make accordingly and run the example. #### Build @@ -35,4 +82,4 @@ $ ./example 10000 4 usage: ./example [n] [nthreads] n: number of keys (integer) nthreads: number of threads (integer) -``` +``` \ No newline at end of file diff --git a/P-CLHT/pmdk.md b/P-CLHT/pmdk.md deleted file mode 100644 index a5a056b6..00000000 --- a/P-CLHT/pmdk.md +++ /dev/null @@ -1,70 +0,0 @@ -## P-CLHT: Persistent Cache-Line Hash Table - PMDK - -This branch of P-CLHT uses PMDK to ensure the persistence and recoverability of the persistent cache-line hash table. All other details of this data structure are the same (cache line flushing, alignment, etc) except for the backend library used to ensure persistence. - -**Motivation** The current implementation does not have a way of recovering permanent memory leaks during a crash. The PMDK library, specifically `libpmemobj`, gives us useful internal structures such as `pmemobj_root`, which is a stored offset within the persistent memory pool that can be used to recover any data that was left in a partial state, etc. - -**How We Used PMDK** The entire conversion required us to replace any data structure pointers to point to the persistent memory pool using the non-transactional, atomic allocation functions such as `pmemobj_alloc`. Since the `PMEMoid` structs (which store the pool offset and id) were 16 bytes, some code manipulation was required to ensure the cache-line alignment of the data structure. Finally, transactions were used for major hashtable operations such as insertion, resizing, and deletion. This part is still being tested and is a work-in-progress. If you look through the code and compare it with the `master` branch, you can see that the changes follow a logical pattern, and the modifications are relatively minor. - -**How to test recoverability?** The best way to recover your hashtable is following the paradigm presented in `clht_open` where all the user has to do is use `pmemobj_root` to recover the root (a clht_t object basically) of the persistent memory pool. Please make sure that you are opening the same pool with the correct pool layout! -``` -... -PMEMoid my_root = pmemobj_root(pop, sizeof(clht_t)); -if (pmemobj_direct(my_root) == NULL) -{ - perror("root pointer is null\n"); -} -... -clht_t* w = pmemobj_direct(my_root); -... -``` - -## Build & Run -### How to enable PM? -1. Install PMDK -```$ git clone https://github.com/pmem/pmdk.git -$ cd pmdk -$ git checkout tags/1.6 -$ make -j -$ cd .. -``` -2. Emulate PM with Ext4-DAX mount -```$ sudo mkfs.ext4 -b 4096 -E stride=512 -F /dev/pmem0 -$ sudo mount -o dax /dev/pmem0 /mnt/pmem -``` - -3. Set pool_size and pool name appropriately using `pmemobj_open` or `pmemobj_create`. For example: -``` -// Size of the memory pool -size_t pool_size = 2*1024*1024*1024UL; -if( access("/mnt/pmem/pool", F_OK ) != -1 ) -{ - // If the pool already exists, open it - pop = pmemobj_open("/mnt/pmem/pool", POBJ_LAYOUT_NAME(clht)); -} else -{ - // If the pool does not exist, create it - pop = pmemobj_create("/mnt/pmem/pool", POBJ_LAYOUT_NAME(clht), pool_size, 0666); -} -``` - -4. Make accordingly and run the example. - -#### Build - -``` -$ mkdir build -$ cd build -$ cmake .. -$ make -j -``` - -#### Run - -``` -$ ./example 10000 4 - -usage: ./example [n] [nthreads] -n: number of keys (integer) -nthreads: number of threads (integer) -```