From df29cb6e2df915d88a802e28dcdade6d6c7dbc12 Mon Sep 17 00:00:00 2001 From: Til Date: Wed, 16 Feb 2022 17:30:58 +0100 Subject: [PATCH 01/16] Improve build experience --- .gitignore | 1 + CHANGELOG.md | 3 ++- WORKSPACE | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 55098c94..f54c79a9 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ compile_commands.json perf.data* build +/cmake-build-debug/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 9fd2a904..8c67c830 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). ## [Unreleased] -Nothing yet. +### Changed +- Build improvements for bazel/cmake ## [1.1.1] - 2022-01-30 ### Changed diff --git a/WORKSPACE b/WORKSPACE index 0bd3d32b..be61fc70 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -11,7 +11,7 @@ http_archive( load("@bazel_skylib//lib:versions.bzl", "versions") versions.check( - minimum_bazel_version = "4.2.2", + minimum_bazel_version = "3.0.0", maximum_bazel_version = "4.2.2", ) From 4f6bfdda1adf712b4f765222dcdb493b8a2bafbe Mon Sep 17 00:00:00 2001 From: Tilmann Date: Wed, 16 Feb 2022 17:33:16 +0100 Subject: [PATCH 02/16] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index fad24140..06dc22b6 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -**Note: for updates please also check the [fork](https://github.com/tzaeschke/phtree-cpp) by the original PH-Tree developer.** +**This is a fork of [Improbable's PH-tree](https://github.com/improbable-eng/phtree-cpp)**. # PH-Tree C++ From 291d96023d654c0aa0a8c1b223cb1e8d51e7ce79 Mon Sep 17 00:00:00 2001 From: Tilmann Date: Tue, 22 Feb 2022 17:23:43 +0100 Subject: [PATCH 03/16] Fix for each postlen / #2 (#3) --- CHANGELOG.md | 1 + README.md | 4 ++-- phtree/phtree_test.cc | 26 ++++++++++++++++++++++++++ phtree/v16/for_each.h | 10 +++++----- 4 files changed, 34 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c67c830..190d228e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] ### Changed +- Fixed issue #2: for_each(callback, filter) was traversing too many nodes. - Build improvements for bazel/cmake ## [1.1.1] - 2022-01-30 diff --git a/README.md b/README.md index 06dc22b6..17ab0a47 100644 --- a/README.md +++ b/README.md @@ -128,9 +128,9 @@ tree.estimate_count(query); #### Queries -* For-each over all elements: `tree.fore_each(callback);` +* For-each over all elements: `tree.for_each(callback);` * Iterator over all elements: `auto iterator = tree.begin();` -* For-each with box shaped window queries: `tree.fore_each(PhBoxD(min, max), callback);` +* For-each with box shaped window queries: `tree.for_each(PhBoxD(min, max), callback);` * Iterator for box shaped window queries: `auto q = tree.begin_query(PhBoxD(min, max));` * Iterator for _k_ nearest neighbor queries: `auto q = tree.begin_knn_query(k, center_point, distance_function);` * Custom query shapes, such as spheres: `tree.for_each(callback, FilterSphere(center, radius, tree.converter()));` diff --git a/phtree/phtree_test.cc b/phtree/phtree_test.cc index fe323c39..8bf4b423 100644 --- a/phtree/phtree_test.cc +++ b/phtree/phtree_test.cc @@ -717,6 +717,32 @@ TEST(PhTreeTest, TestWindowQuery1) { ASSERT_EQ(N, n); } +TEST(PhTreeTest, TestWindowQuery1_WithFilter) { + size_t N = 1000; + const dimension_t dim = 3; + TestTree tree; + std::vector> points; + populate(tree, points, N); + + struct Counter { + void operator()(TestPoint, Id& t) { + ++n_; + id_ = t; + } + Id id_{}; + size_t n_ = 0; + }; + + for (size_t i = 0; i < N; i++) { + TestPoint& p = points.at(i); + Counter callback{}; + FilterAABB filter(p, p, tree.converter()); + tree.for_each(callback, filter); + ASSERT_EQ(i, callback.id_._i); + ASSERT_EQ(1, callback.n_); + } +} + TEST(PhTreeTest, TestWindowQueryMany) { const dimension_t dim = 3; TestPoint min{-100, -100, -100}; diff --git a/phtree/v16/for_each.h b/phtree/v16/for_each.h index aee3d157..d624f099 100644 --- a/phtree/v16/for_each.h +++ b/phtree/v16/for_each.h @@ -41,11 +41,11 @@ class ForEach { void run(const EntryT& root) { assert(root.IsNode()); - TraverseNode(root.GetKey(), root.GetNode()); + TraverseNode(root.GetNode()); } private: - void TraverseNode(const KeyInternal& key, const NodeT& node) { + void TraverseNode(const NodeT& node) { auto iter = node.Entries().begin(); auto end = node.Entries().end(); for (; iter != end; ++iter) { @@ -53,12 +53,12 @@ class ForEach { const auto& child_key = child.GetKey(); if (child.IsNode()) { const auto& child_node = child.GetNode(); - if (filter_.IsNodeValid(key, node.GetPostfixLen() + 1)) { - TraverseNode(child_key, child_node); + if (filter_.IsNodeValid(child_key, child_node.GetPostfixLen() + 1)) { + TraverseNode(child_node); } } else { T& value = child.GetValue(); - if (filter_.IsEntryValid(key, value)) { + if (filter_.IsEntryValid(child_key, value)) { callback_(converter_.post(child_key), value); } } From 18cbb711ccd41911bf6e9304ba9c12f2188f8b51 Mon Sep 17 00:00:00 2001 From: Tilmann Date: Wed, 2 Mar 2022 11:51:32 +0100 Subject: [PATCH 04/16] Fix/issue 4 fix key copy (#6) --- CHANGELOG.md | 3 ++- phtree/v16/node.h | 15 +++++++-------- phtree/v16/phtree_v16.h | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 190d228e..6c9ccb97 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] ### Changed -- Fixed issue #2: for_each(callback, filter) was traversing too many nodes. +- Avoid unnecessary key copy when inserting a node. [#4](https://github.com/tzaeschke/phtree-cpp/issues/4) +- for_each(callback, filter) was traversing too many nodes. [#2](https://github.com/tzaeschke/phtree-cpp/issues/2) - Build improvements for bazel/cmake ## [1.1.1] - 2022-01-30 diff --git a/phtree/v16/node.h b/phtree/v16/node.h index 6994bca0..318a1d0f 100644 --- a/phtree/v16/node.h +++ b/phtree/v16/node.h @@ -164,14 +164,14 @@ class Node { * @param args Constructor arguments for creating a value T that can be inserted for the key. */ template - EntryT* Emplace(bool& is_inserted, const KeyT& key, Args&&... args) { + EntryT& Emplace(bool& is_inserted, const KeyT& key, Args&&... args) { hc_pos_t hc_pos = CalcPosInArray(key, GetPostfixLen()); auto emplace_result = entries_.try_emplace(hc_pos, key, std::forward(args)...); auto& entry = emplace_result.first->second; // Return if emplace succeed, i.e. there was no entry. if (emplace_result.second) { is_inserted = true; - return &entry; + return entry; } return HandleCollision(entry, is_inserted, key, std::forward(args)...); } @@ -311,7 +311,7 @@ class Node { * an entry with the exact same key as new_key, so insertion has failed. */ template - auto* HandleCollision( + auto& HandleCollision( EntryT& existing_entry, bool& is_inserted, const KeyT& new_key, Args&&... args) { assert(!is_inserted); // We have two entries in the same location (local pos). @@ -339,23 +339,22 @@ class Node { } // perfect match -> return existing } - return &existing_entry; + return existing_entry; } template - auto* InsertSplit( + auto& InsertSplit( EntryT& current_entry, const KeyT& new_key, bit_width_t max_conflicting_bits, Args&&... args) { - const auto current_key = current_entry.GetKey(); // determine length of infix bit_width_t new_local_infix_len = GetPostfixLen() - max_conflicting_bits; bit_width_t new_postfix_len = max_conflicting_bits - 1; auto new_sub_node = std::make_unique(new_local_infix_len, new_postfix_len); hc_pos_t pos_sub_1 = CalcPosInArray(new_key, new_postfix_len); - hc_pos_t pos_sub_2 = CalcPosInArray(current_key, new_postfix_len); + hc_pos_t pos_sub_2 = CalcPosInArray(current_entry.GetKey(), new_postfix_len); // Move key/value into subnode new_sub_node->WriteEntry(pos_sub_2, current_entry); @@ -363,7 +362,7 @@ class Node { // Insert new node into local node current_entry.SetNode(std::move(new_sub_node)); - return &new_entry; + return new_entry; } /* diff --git a/phtree/v16/phtree_v16.h b/phtree/v16/phtree_v16.h index 103b7870..e2b37aaf 100644 --- a/phtree/v16/phtree_v16.h +++ b/phtree/v16/phtree_v16.h @@ -95,7 +95,7 @@ class PhTreeV16 { bool is_inserted = false; while (current_entry->IsNode()) { current_entry = - current_entry->GetNode().Emplace(is_inserted, key, std::forward(args)...); + ¤t_entry->GetNode().Emplace(is_inserted, key, std::forward(args)...); } num_entries_ += is_inserted; return {current_entry->GetValue(), is_inserted}; @@ -145,7 +145,7 @@ class PhTreeV16 { bool is_inserted = false; while (current_entry->IsNode()) { current_entry = - current_entry->GetNode().Emplace(is_inserted, key, std::forward(args)...); + ¤t_entry->GetNode().Emplace(is_inserted, key, std::forward(args)...); } num_entries_ += is_inserted; return {current_entry->GetValue(), is_inserted}; From ced5e27a455e8c660632d90f57f2db738502a58a Mon Sep 17 00:00:00 2001 From: Tilmann Date: Wed, 2 Mar 2022 12:58:32 +0100 Subject: [PATCH 05/16] Avoid find on remove node (#7) --- CHANGELOG.md | 1 + phtree/v16/iterator_base.h | 4 ++-- phtree/v16/node.h | 14 ++++++-------- phtree/v16/phtree_v16.h | 29 +++++++++++++++-------------- 4 files changed, 24 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6c9ccb97..d396d912 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] ### Changed +- Avoid unnecessary find() when removing a node. [#5](https://github.com/tzaeschke/phtree-cpp/issues/5) - Avoid unnecessary key copy when inserting a node. [#4](https://github.com/tzaeschke/phtree-cpp/issues/4) - for_each(callback, filter) was traversing too many nodes. [#2](https://github.com/tzaeschke/phtree-cpp/issues/2) - Build improvements for bazel/cmake diff --git a/phtree/v16/iterator_base.h b/phtree/v16/iterator_base.h index 50ac8708..b8c68486 100644 --- a/phtree/v16/iterator_base.h +++ b/phtree/v16/iterator_base.h @@ -135,8 +135,8 @@ class IteratorBase { * The parent entry contains the parent node. The parent node is the node ABOVE the current node * which contains the current entry. */ - const EntryT* GetCurrentNodeEntry() const { - return current_node_; + EntryT* GetCurrentNodeEntry() const { + return const_cast(current_node_); } const EntryT* GetParentNodeEntry() const { diff --git a/phtree/v16/node.h b/phtree/v16/node.h index 318a1d0f..24be9849 100644 --- a/phtree/v16/node.h +++ b/phtree/v16/node.h @@ -63,17 +63,15 @@ namespace { * @param parent_node Current owner of the child node. */ template -void MergeIntoParent(Node& child_node, Node& parent) { +void MergeIntoParent(Node& child_node, Entry& parent_entry) { assert(child_node.GetEntryCount() == 1); + assert(&parent_entry.GetNode() == &child_node); // At this point we have found an entry that needs to be removed. We also know that we need to // remove the child node because it contains at most one other entry and it is not the root // node. auto map_entry = child_node.Entries().begin(); auto& entry = map_entry->second; - auto hc_pos_in_parent = CalcPosInArray(entry.GetKey(), parent.GetPostfixLen()); - auto& parent_entry = parent.Entries().find(hc_pos_in_parent)->second; - if (entry.IsNode()) { // connect sub to parent auto& sub2 = entry.GetNode(); @@ -202,18 +200,18 @@ class Node { * @param found This is and output parameter and will be set to 'true' if a value was removed. * @return A child node if the provided key leads to a child node. */ - Node* Erase(const KeyT& key, Node* parent, bool& found) { + EntryT* Erase(const KeyT& key, EntryT* parent_entry, bool& found) { hc_pos_t hc_pos = CalcPosInArray(key, GetPostfixLen()); auto it = entries_.find(hc_pos); if (it != entries_.end() && DoesEntryMatch(it->second, key)) { if (it->second.IsNode()) { - return &it->second.GetNode(); + return &it->second; } entries_.erase(it); found = true; - if (parent && GetEntryCount() == 1) { - MergeIntoParent(*this, *parent); + if (parent_entry != nullptr && GetEntryCount() == 1) { + MergeIntoParent(*this, *parent_entry); // WARNING: (this) is deleted here, do not refer to it beyond this point. } } diff --git a/phtree/v16/phtree_v16.h b/phtree/v16/phtree_v16.h index e2b37aaf..34c134b3 100644 --- a/phtree/v16/phtree_v16.h +++ b/phtree/v16/phtree_v16.h @@ -57,7 +57,6 @@ class PhTreeV16 { using ScalarExternal = typename CONVERT::ScalarExternal; using ScalarInternal = typename CONVERT::ScalarInternal; using KeyT = typename CONVERT::KeyInternal; - using NodeT = Node; using EntryT = Entry; public: @@ -216,13 +215,16 @@ class PhTreeV16 { * @return '1' if a value was found, otherwise '0'. */ size_t erase(const KeyT& key) { - auto* current_node = &root_.GetNode(); - NodeT* parent_node = nullptr; + auto* current_entry = &root_; + // We do not pass in the root entry as parent of a node because we do not want the + // root entry to be modified. The reason is simply that a lot of the code in this class + // becomes a lot simpler if we can assume the root entry to contain a node. + EntryT* non_root_current_entry = nullptr; bool found = false; - while (current_node) { - auto* child_node = current_node->Erase(key, parent_node, found); - parent_node = current_node; - current_node = child_node; + while (current_entry) { + auto* child_entry = current_entry->GetNode().Erase(key, non_root_current_entry, found); + current_entry = child_entry; + non_root_current_entry = child_entry; } num_entries_ -= found; return found; @@ -245,18 +247,17 @@ class PhTreeV16 { if (iterator.Finished()) { return 0; } - if (!iterator.GetParentNodeEntry()) { - // Why may there be no parent? - // - we are in the root node - // - the iterator did not set this value - // In either case, we need to start searching from the top. - return erase(iterator.GetCurrentResult()->GetKey()); + if (!iterator.GetCurrentNodeEntry() || iterator.GetCurrentNodeEntry() == &root_) { + // There may be no entry because not every iterator sets it. + // Also, do _not_ use the root entry, see erase(key). + // Start searching from the top. + return erase(iterator.GetCurrentResult()->GetKey()); } bool found = false; assert(iterator.GetCurrentNodeEntry() && iterator.GetCurrentNodeEntry()->IsNode()); iterator.GetCurrentNodeEntry()->GetNode().Erase( iterator.GetCurrentResult()->GetKey(), - &iterator.GetParentNodeEntry()->GetNode(), + iterator.GetCurrentNodeEntry(), found); num_entries_ -= found; From 45f2bf556c696ed0aababa499190f58102c76fd1 Mon Sep 17 00:00:00 2001 From: Tilmann Date: Wed, 9 Mar 2022 18:39:35 +0100 Subject: [PATCH 06/16] Fix/issue 9 entry union (#10) --- phtree/benchmark/insert_d_benchmark.cc | 7 +- phtree/phtree_d_test.cc | 3 + phtree/phtree_test.cc | 20 ++- phtree/v16/entry.h | 170 +++++++++++++++++++++---- 4 files changed, 168 insertions(+), 32 deletions(-) diff --git a/phtree/benchmark/insert_d_benchmark.cc b/phtree/benchmark/insert_d_benchmark.cc index 7ef06a36..7f2f071a 100644 --- a/phtree/benchmark/insert_d_benchmark.cc +++ b/phtree/benchmark/insert_d_benchmark.cc @@ -31,6 +31,7 @@ const double GLOBAL_MAX = 10000; */ template class IndexBenchmark { + using Index = PhTreeD; public: IndexBenchmark(benchmark::State& state, TestGenerator data_type, int num_entities); @@ -39,7 +40,7 @@ class IndexBenchmark { private: void SetupWorld(benchmark::State& state); - void Insert(benchmark::State& state, PhTreeD& tree); + void Insert(benchmark::State& state, Index& tree); const TestGenerator data_type_; const int num_entities_; @@ -58,7 +59,7 @@ template void IndexBenchmark::Benchmark(benchmark::State& state) { for (auto _ : state) { state.PauseTiming(); - auto* tree = new PhTreeD(); + auto* tree = new Index(); state.ResumeTiming(); Insert(state, *tree); @@ -82,7 +83,7 @@ void IndexBenchmark::SetupWorld(benchmark::State& state) { } template -void IndexBenchmark::Insert(benchmark::State& state, PhTreeD& tree) { +void IndexBenchmark::Insert(benchmark::State& state, Index& tree) { for (int i = 0; i < num_entities_; ++i) { PhPointD& p = points_[i]; tree.emplace(p, i); diff --git a/phtree/phtree_d_test.cc b/phtree/phtree_d_test.cc index 6e966906..df078f69 100644 --- a/phtree/phtree_d_test.cc +++ b/phtree/phtree_d_test.cc @@ -48,7 +48,10 @@ struct Id { return _i == rhs._i; } + Id(Id const& rhs) = default; + Id(Id && rhs) = default; Id& operator=(Id const& rhs) = default; + Id& operator=(Id && rhs) = default; int _i; }; diff --git a/phtree/phtree_test.cc b/phtree/phtree_test.cc index 8bf4b423..42b4e78d 100644 --- a/phtree/phtree_test.cc +++ b/phtree/phtree_test.cc @@ -76,6 +76,20 @@ struct Id { _i = other._i; } +// Id& operator=(const Id& other) = default; +// Id& operator=(Id&& other) = default; + + Id& operator=(const Id& other) noexcept { + ++copy_assign_count_; + _i = other._i; + return *this; + } + Id& operator=(Id&& other) noexcept { + ++move_assign_count_; + _i = other._i; + return *this; + } + bool operator==(const Id& rhs) const { ++copy_assign_count_; return _i == rhs._i; @@ -90,8 +104,6 @@ struct Id { ++destruct_count_; } - Id& operator=(Id const& rhs) = default; - int _i; }; @@ -221,7 +233,9 @@ void SmokeTestBasicOps(size_t N) { ASSERT_TRUE(tree.empty()); PhTreeDebugHelper::CheckConsistency(tree); - ASSERT_EQ(construct_count_ + copy_construct_count_ + move_construct_count_, destruct_count_); + // Normal construction and destruction should be symmetric. Move-construction is ignored. + ASSERT_GE(construct_count_ + copy_construct_count_ + move_construct_count_, destruct_count_); + ASSERT_LE(construct_count_ + copy_construct_count_, destruct_count_); // The following assertions exist only as sanity checks and may need adjusting. // There is nothing fundamentally wrong if a change in the implementation violates // any of the following assertions, as long as performance/memory impact is observed. diff --git a/phtree/v16/entry.h b/phtree/v16/entry.h index 1c8610fc..1ee61b5c 100644 --- a/phtree/v16/entry.h +++ b/phtree/v16/entry.h @@ -23,14 +23,19 @@ #include #include +//#define PH_TREE_ENTRY_POSTLEN 1 + namespace improbable::phtree::v16 { template class Node; +template +struct EntryVariant; + /* - * Nodes in the PH-Tree contain up to 2^DIM PhEntries, one in each geometric quadrant. - * PhEntries can contain two types of data: + * Nodes in the PH-Tree contain up to 2^DIM Entries, one in each geometric quadrant. + * Entries can contain two types of data: * - A key/value pair (value of type T) * - A prefix/child-node pair, where prefix is the prefix of the child node and the * child node is contained in a unique_ptr. @@ -41,87 +46,200 @@ class Entry { using ValueT = std::remove_const_t; using NodeT = Node; + enum { + VALUE = 0, + NODE = 1, + EMPTY = 2, + }; + public: /* * Construct entry with existing node. */ - Entry(const KeyT& k, std::unique_ptr&& node_ptr) - : kd_key_{k}, node_{std::move(node_ptr)}, value_{std::nullopt} {} + Entry(const KeyT& k, std::unique_ptr&& node_ptr) noexcept + : kd_key_{k} + , node_{std::move(node_ptr)} + , type{NODE} +#ifdef PH_TREE_ENTRY_POSTLEN + , postfix_len_{node_->GetPostfixLen()} +#endif + { + } /* * Construct entry with a new node. */ - Entry(bit_width_t infix_len, bit_width_t postfix_len) - : kd_key_(), node_{std::make_unique(infix_len, postfix_len)}, value_{std::nullopt} {} + Entry(bit_width_t infix_len, bit_width_t postfix_len) noexcept + : kd_key_() + , node_{std::make_unique(infix_len, postfix_len)} + , type{NODE} +#ifdef PH_TREE_ENTRY_POSTLEN + , postfix_len_{postfix_len} +#endif + { + } /* * Construct entry with existing T. */ - Entry(const KeyT& k, std::optional&& value) - : kd_key_{k}, node_{nullptr}, value_{std::move(value)} {} + Entry(const KeyT& k, std::optional&& value) noexcept + : kd_key_{k} + , value_{std::move(value)} + , type{VALUE} +#ifdef PH_TREE_ENTRY_POSTLEN + , postfix_len_{0} +#endif + { + // value.reset(); // std::optional's move constructor does not destruct the previous + } /* * Construct entry with new T or moved T. */ template - explicit Entry(const KeyT& k, Args&&... args) - : kd_key_{k}, node_{nullptr}, value_{std::in_place, std::forward(args)...} {} + explicit Entry(const KeyT& k, Args&&... args) noexcept + : kd_key_{k} + , value_{std::in_place, std::forward(args)...} + , type{VALUE} +#ifdef PH_TREE_ENTRY_POSTLEN + , postfix_len_{0} +#endif + { + } + + Entry(const Entry& other) = delete; + Entry& operator=(const Entry& other) = delete; + + Entry(Entry&& other) noexcept : kd_key_{std::move(other.kd_key_)}, type{std::move(other.type)} { +#ifdef PH_TREE_ENTRY_POSTLEN + postfix_len_ = std::move(other.postfix_len_); +#endif + AssignUnion(std::move(other)); + } + + Entry& operator=(Entry&& other) noexcept { + kd_key_ = std::move(other.kd_key_); +#ifdef PH_TREE_ENTRY_POSTLEN + postfix_len_ = std::move(other.postfix_len_); +#endif + DestroyUnion(); + AssignUnion(std::move(other)); + return *this; + } + + ~Entry() noexcept { + DestroyUnion(); + } [[nodiscard]] const KeyT& GetKey() const { return kd_key_; } [[nodiscard]] bool IsValue() const { - return value_.has_value(); + return type == VALUE; } [[nodiscard]] bool IsNode() const { - return node_.get() != nullptr; + return type == NODE; } [[nodiscard]] T& GetValue() const { - assert(IsValue()); + assert(type == VALUE); return const_cast(*value_); } [[nodiscard]] NodeT& GetNode() const { - assert(IsNode()); + assert(type == NODE); return *node_; } - void SetNode(std::unique_ptr&& node) { - assert(!IsNode()); - node_ = std::move(node); - value_.reset(); + void SetNode(std::unique_ptr&& node) noexcept { +#ifdef PH_TREE_ENTRY_POSTLEN + postfix_len_ = node->GetPostfixLen(); +#endif + // std::cout << "size EV : " << sizeof(kd_key_) << " + " << sizeof(node_) << " + " + // << sizeof(value_) << "+" << sizeof(type) << " = " << sizeof(*this) << + // std::endl; + DestroyUnion(); + type = NODE; + new (&node_) std::unique_ptr{std::move(node)}; + assert(!node); + } + + [[nodiscard]] bit_width_t GetNodePostfixLen() const { + assert(IsNode()); +#ifdef PH_TREE_ENTRY_POSTLEN + return postfix_len_; +#else + return GetNode().GetPostfixLen(); +#endif } [[nodiscard]] std::optional&& ExtractValue() { assert(IsValue()); + type = EMPTY; return std::move(value_); } [[nodiscard]] std::unique_ptr&& ExtractNode() { assert(IsNode()); + type = EMPTY; return std::move(node_); } void ReplaceNodeWithDataFromEntry(Entry&& other) { assert(IsNode()); - kd_key_ = other.GetKey(); + // 'other' may be referenced from the local node, so we need to do move(other) + // before destructing the local node. + auto node = std::move(node_); + type = EMPTY; + *this = std::move(other); + node.~unique_ptr(); +#ifdef PH_TREE_ENTRY_POSTLEN + postfix_len_ = std::move(other.postfix_len_); +#endif + } - if (other.IsNode()) { - node_ = std::move(other.node_); + private: + void AssignUnion(Entry&& other) noexcept { + type = std::move(other.type); + if (type == NODE) { + new (&node_) std::unique_ptr{std::move(other.node_)}; + } else if (type == VALUE) { + new (&value_) std::optional{std::move(other.value_)}; } else { - value_ = std::move(other.value_); - node_.reset(); + assert(false && "Assigning from an EMPTY variant is a waste of time."); } } - private: + void DestroyUnion() noexcept { + if (type == VALUE) { + value_.~optional(); + } else if (type == NODE) { + node_.~unique_ptr(); + } else { + assert(EMPTY); + } + type = EMPTY; + } + KeyT kd_key_; - std::unique_ptr node_; - std::optional value_; + union { + std::unique_ptr node_; + std::optional value_; + }; + alignas(2) std::uint16_t type; + // The length (number of bits) of post fixes (the part of the coordinate that is 'below' the + // current node). If a variable prefix_len would refer to the number of bits in this node's + // prefix, and if we assume 64 bit values, the following would always hold: + // prefix_len + 1 + postfix_len = 64. + // The '+1' accounts for the 1 bit that is represented by the local node's hypercube, + // i.e. the same bit that is used to create the lookup keys in entries_. +#ifdef PH_TREE_ENTRY_POSTLEN + alignas(2) bit_width_t postfix_len_; +#endif }; + } // namespace improbable::phtree::v16 #endif // PHTREE_V16_ENTRY_H From 5d76f97e63b3494cbdab1a3e3f5986232f76e6dd Mon Sep 17 00:00:00 2001 From: Tilmann Date: Wed, 9 Mar 2022 18:53:08 +0100 Subject: [PATCH 07/16] Move postfix tfrom Node to Entry (#12) --- CHANGELOG.md | 3 + phtree/v16/debug_helper_v16.h | 60 +++++++------- phtree/v16/entry.h | 112 ++++++++++---------------- phtree/v16/for_each.h | 21 ++--- phtree/v16/for_each_hc.h | 46 ++++------- phtree/v16/iterator_base.h | 2 +- phtree/v16/iterator_hc.h | 26 +++--- phtree/v16/iterator_knn_hs.h | 3 +- phtree/v16/node.h | 146 ++++++++++------------------------ phtree/v16/phtree_v16.h | 38 ++++----- 10 files changed, 178 insertions(+), 279 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d396d912..5a286281 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] ### Changed +- postfix/infix field moved from Node to Entry. This avoids indirections and improves performance of most by ~10%. + operations by 5-15%. [#11](https://github.com/tzaeschke/phtree-cpp/issues/11) +- Entries now use 'union' to store children. [#9](https://github.com/tzaeschke/phtree-cpp/issues/9) - Avoid unnecessary find() when removing a node. [#5](https://github.com/tzaeschke/phtree-cpp/issues/5) - Avoid unnecessary key copy when inserting a node. [#4](https://github.com/tzaeschke/phtree-cpp/issues/4) - for_each(callback, filter) was traversing too many nodes. [#2](https://github.com/tzaeschke/phtree-cpp/issues/2) diff --git a/phtree/v16/debug_helper_v16.h b/phtree/v16/debug_helper_v16.h index 85ef92d9..9cfb07fe 100644 --- a/phtree/v16/debug_helper_v16.h +++ b/phtree/v16/debug_helper_v16.h @@ -30,11 +30,10 @@ class PhTreeV16; template class DebugHelperV16 : public PhTreeDebugHelper::DebugHelper { - using KeyT = PhPoint; - using NodeT = Node; + using EntryT = Entry; public: - DebugHelperV16(const NodeT& root, size_t size) : root_{root}, size_{size} {} + DebugHelperV16(const EntryT& root, size_t size) : root_{root}, size_{size} {} /* * Depending on the detail parameter this returns: @@ -57,7 +56,7 @@ class DebugHelperV16 : public PhTreeDebugHelper::DebugHelper { ToStringPlain(os, root_); break; case Enum::tree: - ToStringTree(os, 0, root_, KeyT{}, true); + ToStringTree(os, 0, root_, MAX_BIT_WIDTH, true); break; } return os.str(); @@ -70,7 +69,7 @@ class DebugHelperV16 : public PhTreeDebugHelper::DebugHelper { */ [[nodiscard]] PhTreeStats GetStats() const override { PhTreeStats stats; - root_.GetStats(stats); + root_.GetNode().GetStats(stats, root_); return stats; } @@ -78,19 +77,19 @@ class DebugHelperV16 : public PhTreeDebugHelper::DebugHelper { * Checks the consistency of the tree. This function requires assertions to be enabled. */ void CheckConsistency() const override { - assert(size_ == root_.CheckConsistency()); + assert(size_ == root_.GetNode().CheckConsistency(root_)); } private: - void ToStringPlain(std::ostringstream& os, const NodeT& node) const { - for (auto& it : node.Entries()) { - const auto& o = it.second; + void ToStringPlain(std::ostringstream& os, const EntryT& entry) const { + for (auto& it : entry.GetNode().Entries()) { + const auto& child = it.second; // inner node? - if (o.IsNode()) { - ToStringPlain(os, o.GetNode()); + if (child.IsNode()) { + ToStringPlain(os, child); } else { - os << o.GetKey(); - os << " v=" << (o.IsValue() ? "T" : "null") << std::endl; + os << child.GetKey(); + os << " v=" << (child.IsValue() ? "T" : "null") << std::endl; } } } @@ -98,50 +97,53 @@ class DebugHelperV16 : public PhTreeDebugHelper::DebugHelper { void ToStringTree( std::ostringstream& sb, bit_width_t current_depth, - const NodeT& node, - const KeyT& prefix, + const EntryT& entry, + const bit_width_t parent_postfix_len, bool printValue) const { std::string ind = "*"; for (bit_width_t i = 0; i < current_depth; ++i) { ind += "-"; } - sb << ind << "il=" << node.GetInfixLen() << " pl=" << node.GetPostfixLen() - << " ec=" << node.GetEntryCount() << " inf=["; + const auto& node = entry.GetNode(); + const auto infix_len = entry.GetNodeInfixLen(parent_postfix_len); + const auto postfix_len = entry.GetNodePostfixLen(); + sb << ind << "il=" << infix_len << " pl=" << postfix_len << " ec=" << node.GetEntryCount() + << " inf=["; // for a leaf node, the existence of a sub just indicates that the value exists. - if (node.GetInfixLen() > 0) { - bit_mask_t mask = MAX_MASK << node.GetInfixLen(); + if (infix_len > 0) { + bit_mask_t mask = MAX_MASK << infix_len; mask = ~mask; - mask <<= node.GetPostfixLen() + 1; + mask <<= postfix_len + 1; for (dimension_t i = 0; i < DIM; ++i) { - sb << ToBinary(prefix[i] & mask) << ","; + sb << ToBinary(entry.GetKey()[i] & mask) << ","; } } - current_depth += node.GetInfixLen(); + current_depth += infix_len; sb << "] " - << "Node___il=" << node.GetInfixLen() << ";pl=" << node.GetPostfixLen() + << "Node___il=" << infix_len << ";pl=" << postfix_len << ";size=" << node.Entries().size() << std::endl; // To clean previous postfixes. for (auto& it : node.Entries()) { - const auto& o = it.second; + const auto& child = it.second; hc_pos_t hcPos = it.first; - if (o.IsNode()) { + if (child.IsNode()) { sb << ind << "# " << hcPos << " Node: " << std::endl; - ToStringTree(sb, current_depth + 1, o.GetNode(), o.GetKey(), printValue); + ToStringTree(sb, current_depth + 1, child, postfix_len, printValue); } else { // post-fix - sb << ind << ToBinary(o.GetKey()); + sb << ind << ToBinary(child.GetKey()); sb << " hcPos=" << hcPos; if (printValue) { - sb << " v=" << (o.IsValue() ? "T" : "null"); + sb << " v=" << (child.IsValue() ? "T" : "null"); } sb << std::endl; } } } - const NodeT& root_; + const EntryT& root_; const size_t size_; }; } // namespace improbable::phtree::v16 diff --git a/phtree/v16/entry.h b/phtree/v16/entry.h index 1ee61b5c..838a9f86 100644 --- a/phtree/v16/entry.h +++ b/phtree/v16/entry.h @@ -20,6 +20,7 @@ #include "../../phtree/common/common.h" #include "node.h" #include +#include #include #include @@ -56,40 +57,20 @@ class Entry { /* * Construct entry with existing node. */ - Entry(const KeyT& k, std::unique_ptr&& node_ptr) noexcept - : kd_key_{k} - , node_{std::move(node_ptr)} - , type{NODE} -#ifdef PH_TREE_ENTRY_POSTLEN - , postfix_len_{node_->GetPostfixLen()} -#endif - { - } + Entry(const KeyT& k, std::unique_ptr&& node_ptr, bit_width_t postfix_len) noexcept + : kd_key_{k}, node_{std::move(node_ptr)}, union_type_{NODE}, postfix_len_{postfix_len} {} /* * Construct entry with a new node. */ - Entry(bit_width_t infix_len, bit_width_t postfix_len) noexcept - : kd_key_() - , node_{std::make_unique(infix_len, postfix_len)} - , type{NODE} -#ifdef PH_TREE_ENTRY_POSTLEN - , postfix_len_{postfix_len} -#endif - { - } + Entry(bit_width_t postfix_len) noexcept + : kd_key_(), node_{std::make_unique()}, union_type_{NODE}, postfix_len_{postfix_len} {} /* * Construct entry with existing T. */ Entry(const KeyT& k, std::optional&& value) noexcept - : kd_key_{k} - , value_{std::move(value)} - , type{VALUE} -#ifdef PH_TREE_ENTRY_POSTLEN - , postfix_len_{0} -#endif - { + : kd_key_{k}, value_{std::move(value)}, union_type_{VALUE}, postfix_len_{0} { // value.reset(); // std::optional's move constructor does not destruct the previous } @@ -100,28 +81,21 @@ class Entry { explicit Entry(const KeyT& k, Args&&... args) noexcept : kd_key_{k} , value_{std::in_place, std::forward(args)...} - , type{VALUE} -#ifdef PH_TREE_ENTRY_POSTLEN - , postfix_len_{0} -#endif - { - } + , union_type_{VALUE} + , postfix_len_{0} {} Entry(const Entry& other) = delete; Entry& operator=(const Entry& other) = delete; - Entry(Entry&& other) noexcept : kd_key_{std::move(other.kd_key_)}, type{std::move(other.type)} { -#ifdef PH_TREE_ENTRY_POSTLEN + Entry(Entry&& other) noexcept + : kd_key_{std::move(other.kd_key_)}, union_type_{std::move(other.union_type_)} { postfix_len_ = std::move(other.postfix_len_); -#endif AssignUnion(std::move(other)); } Entry& operator=(Entry&& other) noexcept { kd_key_ = std::move(other.kd_key_); -#ifdef PH_TREE_ENTRY_POSTLEN postfix_len_ = std::move(other.postfix_len_); -#endif DestroyUnion(); AssignUnion(std::move(other)); return *this; @@ -136,54 +110,55 @@ class Entry { } [[nodiscard]] bool IsValue() const { - return type == VALUE; + return union_type_ == VALUE; } [[nodiscard]] bool IsNode() const { - return type == NODE; + return union_type_ == NODE; } [[nodiscard]] T& GetValue() const { - assert(type == VALUE); + assert(union_type_ == VALUE); return const_cast(*value_); } [[nodiscard]] NodeT& GetNode() const { - assert(type == NODE); + assert(union_type_ == NODE); return *node_; } - void SetNode(std::unique_ptr&& node) noexcept { -#ifdef PH_TREE_ENTRY_POSTLEN - postfix_len_ = node->GetPostfixLen(); -#endif - // std::cout << "size EV : " << sizeof(kd_key_) << " + " << sizeof(node_) << " + " - // << sizeof(value_) << "+" << sizeof(type) << " = " << sizeof(*this) << - // std::endl; + void SetNode(std::unique_ptr&& node, bit_width_t postfix_len) noexcept { + postfix_len_ = postfix_len; DestroyUnion(); - type = NODE; + union_type_ = NODE; new (&node_) std::unique_ptr{std::move(node)}; assert(!node); } - [[nodiscard]] bit_width_t GetNodePostfixLen() const { + [[nodiscard]] bit_width_t GetNodePostfixLen() const noexcept { assert(IsNode()); -#ifdef PH_TREE_ENTRY_POSTLEN return postfix_len_; -#else - return GetNode().GetPostfixLen(); -#endif } - [[nodiscard]] std::optional&& ExtractValue() { + [[nodiscard]] bit_width_t GetNodeInfixLen(bit_width_t parent_postfix_len) const noexcept { + assert(IsNode()); + return parent_postfix_len - GetNodePostfixLen() - 1; + } + + [[nodiscard]] bool HasNodeInfix(bit_width_t parent_postfix_len) const noexcept { + assert(IsNode()); + return parent_postfix_len - GetNodePostfixLen() - 1 > 0; + } + + [[nodiscard]] std::optional&& ExtractValue() noexcept { assert(IsValue()); - type = EMPTY; + union_type_ = EMPTY; return std::move(value_); } - [[nodiscard]] std::unique_ptr&& ExtractNode() { + [[nodiscard]] std::unique_ptr&& ExtractNode() noexcept { assert(IsNode()); - type = EMPTY; + union_type_ = EMPTY; return std::move(node_); } @@ -192,20 +167,17 @@ class Entry { // 'other' may be referenced from the local node, so we need to do move(other) // before destructing the local node. auto node = std::move(node_); - type = EMPTY; + union_type_ = EMPTY; *this = std::move(other); node.~unique_ptr(); -#ifdef PH_TREE_ENTRY_POSTLEN - postfix_len_ = std::move(other.postfix_len_); -#endif } private: void AssignUnion(Entry&& other) noexcept { - type = std::move(other.type); - if (type == NODE) { + union_type_ = std::move(other.union_type_); + if (union_type_ == NODE) { new (&node_) std::unique_ptr{std::move(other.node_)}; - } else if (type == VALUE) { + } else if (union_type_ == VALUE) { new (&value_) std::optional{std::move(other.value_)}; } else { assert(false && "Assigning from an EMPTY variant is a waste of time."); @@ -213,14 +185,14 @@ class Entry { } void DestroyUnion() noexcept { - if (type == VALUE) { + if (union_type_ == VALUE) { value_.~optional(); - } else if (type == NODE) { + } else if (union_type_ == NODE) { node_.~unique_ptr(); } else { - assert(EMPTY); + assert(union_type_ == EMPTY); } - type = EMPTY; + union_type_ = EMPTY; } KeyT kd_key_; @@ -228,16 +200,14 @@ class Entry { std::unique_ptr node_; std::optional value_; }; - alignas(2) std::uint16_t type; + alignas(2) std::uint16_t union_type_; // The length (number of bits) of post fixes (the part of the coordinate that is 'below' the // current node). If a variable prefix_len would refer to the number of bits in this node's // prefix, and if we assume 64 bit values, the following would always hold: // prefix_len + 1 + postfix_len = 64. // The '+1' accounts for the 1 bit that is represented by the local node's hypercube, // i.e. the same bit that is used to create the lookup keys in entries_. -#ifdef PH_TREE_ENTRY_POSTLEN alignas(2) bit_width_t postfix_len_; -#endif }; } // namespace improbable::phtree::v16 diff --git a/phtree/v16/for_each.h b/phtree/v16/for_each.h index d624f099..2531f70e 100644 --- a/phtree/v16/for_each.h +++ b/phtree/v16/for_each.h @@ -29,32 +29,25 @@ namespace improbable::phtree::v16 { template class ForEach { static constexpr dimension_t DIM = CONVERT::DimInternal; - using KeyExternal = typename CONVERT::KeyExternal; using KeyInternal = typename CONVERT::KeyInternal; using SCALAR = typename CONVERT::ScalarInternal; using EntryT = Entry; - using NodeT = Node; public: ForEach(const CONVERT& converter, CALLBACK_FN& callback, FILTER filter) : converter_{converter}, callback_{callback}, filter_(std::move(filter)) {} - void run(const EntryT& root) { - assert(root.IsNode()); - TraverseNode(root.GetNode()); - } - - private: - void TraverseNode(const NodeT& node) { - auto iter = node.Entries().begin(); - auto end = node.Entries().end(); + void Traverse(const EntryT& entry) { + assert(entry.IsNode()); + auto& entries = entry.GetNode().Entries(); + auto iter = entries.begin(); + auto end = entries.end(); for (; iter != end; ++iter) { const auto& child = iter->second; const auto& child_key = child.GetKey(); if (child.IsNode()) { - const auto& child_node = child.GetNode(); - if (filter_.IsNodeValid(child_key, child_node.GetPostfixLen() + 1)) { - TraverseNode(child_node); + if (filter_.IsNodeValid(child_key, child.GetNodePostfixLen() + 1)) { + Traverse(child); } } else { T& value = child.GetValue(); diff --git a/phtree/v16/for_each_hc.h b/phtree/v16/for_each_hc.h index d870debc..46556f1e 100644 --- a/phtree/v16/for_each_hc.h +++ b/phtree/v16/for_each_hc.h @@ -36,11 +36,9 @@ namespace improbable::phtree::v16 { template class ForEachHC { static constexpr dimension_t DIM = CONVERT::DimInternal; - using KeyExternal = typename CONVERT::KeyExternal; using KeyInternal = typename CONVERT::KeyInternal; using SCALAR = typename CONVERT::ScalarInternal; using EntryT = Entry; - using NodeT = Node; public: ForEachHC( @@ -55,18 +53,15 @@ class ForEachHC { , callback_{callback} , filter_(std::move(filter)) {} - void run(const EntryT& root) { - assert(root.IsNode()); - TraverseNode(root.GetKey(), root.GetNode()); - } - - private: - void TraverseNode(const KeyInternal& key, const NodeT& node) { + void Traverse(const EntryT& entry) { + assert(entry.IsNode()); hc_pos_t mask_lower = 0; hc_pos_t mask_upper = 0; - CalcLimits(node.GetPostfixLen(), key, mask_lower, mask_upper); - auto iter = node.Entries().lower_bound(mask_lower); - auto end = node.Entries().end(); + CalcLimits(entry.GetNodePostfixLen(), entry.GetKey(), mask_lower, mask_upper); + auto& entries = entry.GetNode().Entries(); + auto postfix_len = entry.GetNodePostfixLen(); + auto iter = entries.lower_bound(mask_lower); + auto end = entries.end(); for (; iter != end && iter->first <= mask_upper; ++iter) { auto child_hc_pos = iter->first; // Use bit-mask magic to check whether we are in a valid quadrant. @@ -75,14 +70,13 @@ class ForEachHC { const auto& child = iter->second; const auto& child_key = child.GetKey(); if (child.IsNode()) { - const auto& child_node = child.GetNode(); - if (CheckNode(child_key, child_node)) { - TraverseNode(child_key, child_node); + if (CheckNode(child, postfix_len)) { + Traverse(child); } } else { T& value = child.GetValue(); if (IsInRange(child_key, range_min_, range_max_) && - ApplyFilter(child_key, value)) { + filter_.IsEntryValid(child_key, value)) { callback_(converter_.post(child_key), value); } } @@ -90,14 +84,16 @@ class ForEachHC { } } - bool CheckNode(const KeyInternal& key, const NodeT& node) const { + bool CheckNode(const EntryT& entry, bit_width_t parent_postfix_len) const { + const KeyInternal& key = entry.GetKey(); // Check if the node overlaps with the query box. // An infix with len=0 implies that at least part of the child node overlaps with the query, // otherwise the bit mask checking would have returned 'false'. - if (node.GetInfixLen() > 0) { + // Putting it differently, if the infix has len=0, then there is no point in validating it. + if (entry.HasNodeInfix(parent_postfix_len)) { // Mask for comparing the prefix with the query boundaries. - assert(node.GetPostfixLen() + 1 < MAX_BIT_WIDTH); - SCALAR comparison_mask = MAX_MASK << (node.GetPostfixLen() + 1); + assert(entry.GetNodePostfixLen() + 1 < MAX_BIT_WIDTH); + SCALAR comparison_mask = MAX_MASK << (entry.GetNodePostfixLen() + 1); for (dimension_t dim = 0; dim < DIM; ++dim) { SCALAR prefix = key[dim] & comparison_mask; if (prefix > range_max_[dim] || prefix < (range_min_[dim] & comparison_mask)) { @@ -105,15 +101,7 @@ class ForEachHC { } } } - return ApplyFilter(key, node); - } - - [[nodiscard]] bool ApplyFilter(const KeyInternal& key, const NodeT& node) const { - return filter_.IsNodeValid(key, node.GetPostfixLen() + 1); - } - - [[nodiscard]] bool ApplyFilter(const KeyInternal& key, const T& value) const { - return filter_.IsEntryValid(key, value); + return filter_.IsNodeValid(key, entry.GetNodePostfixLen() + 1); } void CalcLimits( diff --git a/phtree/v16/iterator_base.h b/phtree/v16/iterator_base.h index b8c68486..8fcd6eea 100644 --- a/phtree/v16/iterator_base.h +++ b/phtree/v16/iterator_base.h @@ -108,7 +108,7 @@ class IteratorBase { [[nodiscard]] bool ApplyFilter(const EntryT& entry) const { return entry.IsNode() - ? filter_.IsNodeValid(entry.GetKey(), entry.GetNode().GetPostfixLen() + 1) + ? filter_.IsNodeValid(entry.GetKey(), entry.GetNodePostfixLen() + 1) : filter_.IsEntryValid(entry.GetKey(), entry.GetValue()); } diff --git a/phtree/v16/iterator_hc.h b/phtree/v16/iterator_hc.h index 2485550c..b61c550b 100644 --- a/phtree/v16/iterator_hc.h +++ b/phtree/v16/iterator_hc.h @@ -100,7 +100,7 @@ class IteratorHC : public IteratorBase { auto& PrepareAndPush(const EntryT& entry) { assert(stack_size_ < stack_.size() - 1); auto& ni = stack_[stack_size_++]; - ni.init(range_min_, range_max_, entry.GetNode(), entry.GetKey()); + ni.Init(range_min_, range_max_, entry); return ni; } @@ -132,12 +132,14 @@ class NodeIterator { using NodeT = Node; public: - NodeIterator() : iter_{}, node_{nullptr}, mask_lower_{0}, mask_upper_(0) {} + NodeIterator() : iter_{}, mask_lower_{0}, mask_upper_{0}, postfix_len_{0} {} - void init(const KeyT& range_min, const KeyT& range_max, const NodeT& node, const KeyT& prefix) { - node_ = &node; - CalcLimits(node.GetPostfixLen(), range_min, range_max, prefix); + void Init(const KeyT& range_min, const KeyT& range_max, const EntryT& entry) { + auto& node = entry.GetNode(); + CalcLimits(entry.GetNodePostfixLen(), range_min, range_max, entry.GetKey()); iter_ = node.Entries().lower_bound(mask_lower_); + node_ = &node; + postfix_len_ = entry.GetNodePostfixLen(); } /* @@ -163,16 +165,16 @@ class NodeIterator { return IsInRange(candidate.GetKey(), range_min, range_max); } - auto& node = candidate.GetNode(); // Check if node-prefix allows sub-node to contain any useful values. // An infix with len=0 implies that at least part of the child node overlaps with the query. - if (node.GetInfixLen() == 0) { + // Putting it differently, if the infix has len=0, then there is no point in validating it. + if (!candidate.HasNodeInfix(postfix_len_)) { return true; } // Mask for comparing the prefix with the query boundaries. - assert(node.GetPostfixLen() + 1 < MAX_BIT_WIDTH); - SCALAR comparison_mask = MAX_MASK << (node.GetPostfixLen() + 1); + assert(candidate.GetNodePostfixLen() + 1 < MAX_BIT_WIDTH); + SCALAR comparison_mask = MAX_MASK << (candidate.GetNodePostfixLen() + 1); auto& key = candidate.GetKey(); for (dimension_t dim = 0; dim < DIM; ++dim) { SCALAR in = key[dim] & comparison_mask; @@ -250,13 +252,17 @@ class NodeIterator { } mask_lower_ = lower_limit; mask_upper_ = upper_limit; +// std::cout << "size IT : " << sizeof(iter_) << " + " << sizeof(node_) << " + " +// << sizeof(mask_lower_) << " + " << sizeof(mask_lower_) << " + " +// << sizeof(postfix_len_) << " = " << sizeof(*this) << std::endl; } private: EntryIteratorC iter_; - const NodeT* node_; + NodeT* node_; hc_pos_t mask_lower_; hc_pos_t mask_upper_; + bit_width_t postfix_len_; }; } // namespace } // namespace improbable::phtree::v16 diff --git a/phtree/v16/iterator_knn_hs.h b/phtree/v16/iterator_knn_hs.h index 3c30f7d6..2dc7aab0 100644 --- a/phtree/v16/iterator_knn_hs.h +++ b/phtree/v16/iterator_knn_hs.h @@ -114,8 +114,7 @@ class IteratorKnnHS : public IteratorBase { auto& e2 = entry.second; if (this->ApplyFilter(e2)) { if (e2.IsNode()) { - auto& sub = e2.GetNode(); - double d = DistanceToNode(e2.GetKey(), sub.GetPostfixLen() + 1); + double d = DistanceToNode(e2.GetKey(), e2.GetNodePostfixLen() + 1); queue_.emplace(d, &e2); } else { double d = distance_(center_post_, this->post(e2.GetKey())); diff --git a/phtree/v16/node.h b/phtree/v16/node.h index 24be9849..36c04f90 100644 --- a/phtree/v16/node.h +++ b/phtree/v16/node.h @@ -47,44 +47,6 @@ using EntryIterator = decltype(EntryMap().begin()); template using EntryIteratorC = decltype(EntryMap().cbegin()); -namespace { - -/* - * Takes a construct of parent_node -> child_node, ie the child_node is owned by parent_node. - * This function also assumes that the child_node contains only one entry. - * - * This function takes the remaining entry from the child node and inserts it into the parent_node - * where it replaces (and implicitly deletes) the child_node. - * @param prefix_of_child_in_parent This specifies the position of child_node inside the - * parent_node. We only need the relevant bits at the level of the parent_node. This means we can - * use any key of any node or entry that is, or used to be) inside the child_node, because they all - * share the same prefix. This includes the key of the child_node itself. - * @param child_node The node to be removed from the parent node. - * @param parent_node Current owner of the child node. - */ -template -void MergeIntoParent(Node& child_node, Entry& parent_entry) { - assert(child_node.GetEntryCount() == 1); - assert(&parent_entry.GetNode() == &child_node); - // At this point we have found an entry that needs to be removed. We also know that we need to - // remove the child node because it contains at most one other entry and it is not the root - // node. - auto map_entry = child_node.Entries().begin(); - auto& entry = map_entry->second; - - if (entry.IsNode()) { - // connect sub to parent - auto& sub2 = entry.GetNode(); - bit_width_t new_infix_len = child_node.GetInfixLen() + 1 + sub2.GetInfixLen(); - sub2.SetInfixLen(new_infix_len); - } - - // Now move the single entry into the parent, the position in the parent is the same as the - // child_node. - parent_entry.ReplaceNodeWithDataFromEntry(std::move(entry)); -} -} // namespace - /* * A node of the PH-Tree. It contains up to 2^DIM entries, each entry being either a leaf with data * of type T or a child node (both are of the variant type Entry). @@ -110,11 +72,7 @@ class Node { using EntryT = Entry; public: - Node(bit_width_t infix_len, bit_width_t postfix_len) - : postfix_len_(postfix_len), infix_len_(infix_len), entries_{} { - assert(infix_len_ < MAX_BIT_WIDTH); - assert(infix_len >= 0); - } + Node() : entries_{} {} // Nodes should never be copied! Node(const Node&) = delete; @@ -126,14 +84,6 @@ class Node { return entries_.size(); } - [[nodiscard]] bit_width_t GetInfixLen() const { - return infix_len_; - } - - [[nodiscard]] bit_width_t GetPostfixLen() const { - return postfix_len_; - } - /* * Attempts to emplace an entry in this node. * The behavior is analogous to std::map::emplace(), i.e. if there is already a value with the @@ -162,8 +112,8 @@ class Node { * @param args Constructor arguments for creating a value T that can be inserted for the key. */ template - EntryT& Emplace(bool& is_inserted, const KeyT& key, Args&&... args) { - hc_pos_t hc_pos = CalcPosInArray(key, GetPostfixLen()); + EntryT& Emplace(bool& is_inserted, const KeyT& key, bit_width_t postfix_len, Args&&... args) { + hc_pos_t hc_pos = CalcPosInArray(key, postfix_len); auto emplace_result = entries_.try_emplace(hc_pos, key, std::forward(args)...); auto& entry = emplace_result.first->second; // Return if emplace succeed, i.e. there was no entry. @@ -171,20 +121,20 @@ class Node { is_inserted = true; return entry; } - return HandleCollision(entry, is_inserted, key, std::forward(args)...); + return HandleCollision(entry, is_inserted, key, postfix_len, std::forward(args)...); } /* * Returns the value (T or Node) if the entry exists and matches the key. Child nodes are * _not_ traversed. * @param key The key of the entry - * @param parent parent node + * @param parent The parent node * @return The sub node or null. */ - const EntryT* Find(const KeyT& key) const { - hc_pos_t hc_pos = CalcPosInArray(key, GetPostfixLen()); + const EntryT* Find(const KeyT& key, bit_width_t postfix_len) const { + hc_pos_t hc_pos = CalcPosInArray(key, postfix_len); const auto& entry = entries_.find(hc_pos); - if (entry != entries_.end() && DoesEntryMatch(entry->second, key)) { + if (entry != entries_.end() && DoesEntryMatch(entry->second, key, postfix_len)) { return &entry->second; } return nullptr; @@ -200,10 +150,10 @@ class Node { * @param found This is and output parameter and will be set to 'true' if a value was removed. * @return A child node if the provided key leads to a child node. */ - EntryT* Erase(const KeyT& key, EntryT* parent_entry, bool& found) { - hc_pos_t hc_pos = CalcPosInArray(key, GetPostfixLen()); + EntryT* Erase(const KeyT& key, EntryT* parent_entry, bit_width_t postfix_len, bool& found) { + hc_pos_t hc_pos = CalcPosInArray(key, postfix_len); auto it = entries_.find(hc_pos); - if (it != entries_.end() && DoesEntryMatch(it->second, key)) { + if (it != entries_.end() && DoesEntryMatch(it->second, key, postfix_len)) { if (it->second.IsNode()) { return &it->second; } @@ -211,7 +161,9 @@ class Node { found = true; if (parent_entry != nullptr && GetEntryCount() == 1) { - MergeIntoParent(*this, *parent_entry); + // We take the remaining entry from the current node and inserts it into the + // parent_entry where it replaces (and implicitly deletes) the current node. + parent_entry->ReplaceNodeWithDataFromEntry(std::move(entries_.begin()->second)); // WARNING: (this) is deleted here, do not refer to it beyond this point. } } @@ -226,23 +178,23 @@ class Node { return entries_; } - void GetStats(PhTreeStats& stats, bit_width_t current_depth = 0) const { + void GetStats( + PhTreeStats& stats, const EntryT& current_entry, bit_width_t current_depth = 0) const { size_t num_children = entries_.size(); ++stats.n_nodes_; - ++stats.infix_hist_[GetInfixLen()]; ++stats.node_depth_hist_[current_depth]; ++stats.node_size_log_hist_[32 - CountLeadingZeros(std::uint32_t(num_children))]; stats.n_total_children_ += num_children; - - current_depth += GetInfixLen(); stats.q_total_depth_ += current_depth; for (auto& entry : entries_) { auto& child = entry.second; if (child.IsNode()) { + auto child_infix_len = child.GetNodeInfixLen(current_entry.GetNodePostfixLen()); + ++stats.infix_hist_[child_infix_len]; auto& sub = child.GetNode(); - sub.GetStats(stats, current_depth + 1); + sub.GetStats(stats, child, current_depth + 1 + child_infix_len); } else { ++stats.q_n_post_fix_n_[current_depth]; ++stats.size_; @@ -250,11 +202,9 @@ class Node { } } - size_t CheckConsistency(bit_width_t current_depth = 0) const { + size_t CheckConsistency(const EntryT& current_entry, bit_width_t current_depth = 0) const { // Except for a root node if the tree has <2 entries. assert(entries_.size() >= 2 || current_depth == 0); - - current_depth += GetInfixLen(); size_t num_entries_local = 0; size_t num_entries_children = 0; for (auto& entry : entries_) { @@ -262,8 +212,12 @@ class Node { if (child.IsNode()) { auto& sub = child.GetNode(); // Check node consistency - assert(sub.GetInfixLen() + 1 + sub.GetPostfixLen() == GetPostfixLen()); - num_entries_children += sub.CheckConsistency(current_depth + 1); + auto sub_infix_len = child.GetNodeInfixLen(current_entry.GetNodePostfixLen()); + assert( + sub_infix_len + 1 + child.GetNodePostfixLen() == + current_entry.GetNodePostfixLen()); + num_entries_children += + sub.CheckConsistency(child, current_depth + 1 + sub_infix_len); } else { ++num_entries_local; } @@ -271,12 +225,6 @@ class Node { return num_entries_local + num_entries_children; } - void SetInfixLen(bit_width_t newInfLen) { - assert(newInfLen < MAX_BIT_WIDTH); - assert(newInfLen >= 0); - infix_len_ = newInfLen; - } - private: template auto& WriteValue(hc_pos_t hc_pos, const KeyT& new_key, Args&&... args) { @@ -285,10 +233,8 @@ class Node { void WriteEntry(hc_pos_t hc_pos, EntryT& entry) { if (entry.IsNode()) { - auto& node = entry.GetNode(); - bit_width_t new_subnode_infix_len = postfix_len_ - node.postfix_len_ - 1; - node.SetInfixLen(new_subnode_infix_len); - entries_.try_emplace(hc_pos, entry.GetKey(), entry.ExtractNode()); + auto postfix_len = entry.GetNodePostfixLen(); + entries_.try_emplace(hc_pos, entry.GetKey(), entry.ExtractNode(), postfix_len); } else { entries_.try_emplace(hc_pos, entry.GetKey(), entry.ExtractValue()); } @@ -310,17 +256,20 @@ class Node { */ template auto& HandleCollision( - EntryT& existing_entry, bool& is_inserted, const KeyT& new_key, Args&&... args) { + EntryT& existing_entry, + bool& is_inserted, + const KeyT& new_key, + bit_width_t current_postfix_len, + Args&&... args) { assert(!is_inserted); // We have two entries in the same location (local pos). // Now we need to compare the keys. // If they are identical, we simply return the entry for further traversal. if (existing_entry.IsNode()) { - auto& sub_node = existing_entry.GetNode(); - if (sub_node.GetInfixLen() > 0) { + if (existing_entry.HasNodeInfix(current_postfix_len)) { bit_width_t max_conflicting_bits = NumberOfDivergingBits(new_key, existing_entry.GetKey()); - if (max_conflicting_bits > sub_node.GetPostfixLen() + 1) { + if (max_conflicting_bits > existing_entry.GetNodePostfixLen() + 1) { is_inserted = true; return InsertSplit( existing_entry, new_key, max_conflicting_bits, std::forward(args)...); @@ -346,11 +295,8 @@ class Node { const KeyT& new_key, bit_width_t max_conflicting_bits, Args&&... args) { - - // determine length of infix - bit_width_t new_local_infix_len = GetPostfixLen() - max_conflicting_bits; bit_width_t new_postfix_len = max_conflicting_bits - 1; - auto new_sub_node = std::make_unique(new_local_infix_len, new_postfix_len); + auto new_sub_node = std::make_unique(); hc_pos_t pos_sub_1 = CalcPosInArray(new_key, new_postfix_len); hc_pos_t pos_sub_2 = CalcPosInArray(current_entry.GetKey(), new_postfix_len); @@ -359,7 +305,7 @@ class Node { auto& new_entry = new_sub_node->WriteValue(pos_sub_1, new_key, std::forward(args)...); // Insert new node into local node - current_entry.SetNode(std::move(new_sub_node)); + current_entry.SetNode(std::move(new_sub_node), new_postfix_len); return new_entry; } @@ -371,11 +317,11 @@ class Node { * @return 'true' iff the relevant part of the key matches (prefix for nodes, whole key for * other entries). */ - bool DoesEntryMatch(const EntryT& entry, const KeyT& key) const { + bool DoesEntryMatch( + const EntryT& entry, const KeyT& key, const bit_width_t parent_postfix_len) const { if (entry.IsNode()) { - const auto& sub = entry.GetNode(); - if (sub.GetInfixLen() > 0) { - const bit_mask_t mask = MAX_MASK << (sub.GetPostfixLen() + 1); + if (entry.HasNodeInfix(parent_postfix_len)) { + const bit_mask_t mask = MAX_MASK << (entry.GetNodePostfixLen() + 1); return KeyEquals(entry.GetKey(), key, mask); } return true; @@ -383,16 +329,6 @@ class Node { return entry.GetKey() == key; } - // The length (number of bits) of post fixes (the part of the coordinate that is 'below' the - // current node). If a variable prefix_len would refer to the number of bits in this node's - // prefix, and if we assume 64 bit values, the following would always hold: - // prefix_len + 1 + postfix_len = 64. - // The '+1' accounts for the 1 bit that is represented by the local node's hypercube, - // ie. the same bit that is used to create the lookup keys in entries_. - bit_width_t postfix_len_; - // The number of bits between this node and the parent node. For 64bit keys possible values - // range from 0 to 62. - bit_width_t infix_len_; EntryMap entries_; }; diff --git a/phtree/v16/phtree_v16.h b/phtree/v16/phtree_v16.h index 34c134b3..fb666370 100644 --- a/phtree/v16/phtree_v16.h +++ b/phtree/v16/phtree_v16.h @@ -70,7 +70,7 @@ class PhTreeV16 { PhTreeV16(CONVERT& converter = ConverterNoOp()) : num_entries_{0} - , root_{0, MAX_BIT_WIDTH - 1} + , root_{MAX_BIT_WIDTH - 1} , the_end_{converter} , converter_{converter} {} @@ -93,8 +93,8 @@ class PhTreeV16 { auto* current_entry = &root_; bool is_inserted = false; while (current_entry->IsNode()) { - current_entry = - ¤t_entry->GetNode().Emplace(is_inserted, key, std::forward(args)...); + current_entry = ¤t_entry->GetNode().Emplace( + is_inserted, key, current_entry->GetNodePostfixLen(), std::forward(args)...); } num_entries_ += is_inserted; return {current_entry->GetValue(), is_inserted}; @@ -134,7 +134,7 @@ class PhTreeV16 { auto* parent_entry = iterator.GetParentNodeEntry(); if (NumberOfDivergingBits(key, parent_entry->GetKey()) > - parent_entry->GetNode().GetPostfixLen() + 1) { + parent_entry->GetNodePostfixLen() + 1) { // replace higher up in the tree return emplace(key, std::forward(args)...); } @@ -143,8 +143,8 @@ class PhTreeV16 { auto* current_entry = parent_entry; bool is_inserted = false; while (current_entry->IsNode()) { - current_entry = - ¤t_entry->GetNode().Emplace(is_inserted, key, std::forward(args)...); + current_entry = ¤t_entry->GetNode().Emplace( + is_inserted, key, current_entry->GetNodePostfixLen(), std::forward(args)...); } num_entries_ += is_inserted; return {current_entry->GetValue(), is_inserted}; @@ -179,7 +179,7 @@ class PhTreeV16 { } auto* current_entry = &root_; while (current_entry && current_entry->IsNode()) { - current_entry = current_entry->GetNode().Find(key); + current_entry = current_entry->GetNode().Find(key, current_entry->GetNodePostfixLen()); } return current_entry ? 1 : 0; } @@ -203,7 +203,7 @@ class PhTreeV16 { while (current_entry && current_entry->IsNode()) { parent_node = current_node; current_node = current_entry; - current_entry = current_entry->GetNode().Find(key); + current_entry = current_entry->GetNode().Find(key, current_entry->GetNodePostfixLen()); } return IteratorSimple(current_entry, current_node, parent_node, converter_); @@ -222,7 +222,8 @@ class PhTreeV16 { EntryT* non_root_current_entry = nullptr; bool found = false; while (current_entry) { - auto* child_entry = current_entry->GetNode().Erase(key, non_root_current_entry, found); + auto* child_entry = current_entry->GetNode().Erase( + key, non_root_current_entry, current_entry->GetNodePostfixLen(), found); current_entry = child_entry; non_root_current_entry = child_entry; } @@ -247,17 +248,18 @@ class PhTreeV16 { if (iterator.Finished()) { return 0; } - if (!iterator.GetCurrentNodeEntry() || iterator.GetCurrentNodeEntry() == &root_) { - // There may be no entry because not every iterator sets it. - // Also, do _not_ use the root entry, see erase(key). - // Start searching from the top. - return erase(iterator.GetCurrentResult()->GetKey()); + if (!iterator.GetCurrentNodeEntry() || iterator.GetCurrentNodeEntry() == &root_) { + // There may be no entry because not every iterator sets it. + // Also, do _not_ use the root entry, see erase(key). + // Start searching from the top. + return erase(iterator.GetCurrentResult()->GetKey()); } bool found = false; assert(iterator.GetCurrentNodeEntry() && iterator.GetCurrentNodeEntry()->IsNode()); iterator.GetCurrentNodeEntry()->GetNode().Erase( iterator.GetCurrentResult()->GetKey(), iterator.GetCurrentNodeEntry(), + iterator.GetCurrentNodeEntry()->GetNodePostfixLen(), found); num_entries_ -= found; @@ -277,7 +279,7 @@ class PhTreeV16 { */ template void for_each(CALLBACK_FN& callback, FILTER filter = FILTER()) const { - ForEach(converter_, callback, filter).run(root_); + ForEach(converter_, callback, filter).Traverse(root_); } /* @@ -297,7 +299,7 @@ class PhTreeV16 { FILTER filter = FILTER()) const { ForEachHC( query_box.min(), query_box.max(), converter_, callback, filter) - .run(root_); + .Traverse(root_); } /* @@ -363,7 +365,7 @@ class PhTreeV16 { */ void clear() { num_entries_ = 0; - root_ = EntryT(0, MAX_BIT_WIDTH - 1); + root_ = EntryT(MAX_BIT_WIDTH - 1); } /* @@ -385,7 +387,7 @@ class PhTreeV16 { * This function is only for debugging. */ auto GetDebugHelper() const { - return DebugHelperV16(root_.GetNode(), num_entries_); + return DebugHelperV16(root_, num_entries_); } private: From 80a5ceb2cfd77eb64c457c71930e1cc110134521 Mon Sep 17 00:00:00 2001 From: Tilmann Date: Wed, 9 Mar 2022 19:48:20 +0100 Subject: [PATCH 08/16] issue 11 cleanup (#13) --- phtree/v16/entry.h | 3 --- phtree/v16/iterator_hc.h | 19 ++++++++++--------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/phtree/v16/entry.h b/phtree/v16/entry.h index 838a9f86..0e4b744e 100644 --- a/phtree/v16/entry.h +++ b/phtree/v16/entry.h @@ -20,12 +20,9 @@ #include "../../phtree/common/common.h" #include "node.h" #include -#include #include #include -//#define PH_TREE_ENTRY_POSTLEN 1 - namespace improbable::phtree::v16 { template diff --git a/phtree/v16/iterator_hc.h b/phtree/v16/iterator_hc.h index b61c550b..bf6cccda 100644 --- a/phtree/v16/iterator_hc.h +++ b/phtree/v16/iterator_hc.h @@ -59,6 +59,7 @@ class IteratorHC : public IteratorBase { , stack_size_{0} , range_min_{range_min} , range_max_{range_max} { + stack_.reserve(8); PrepareAndPush(root); FindNextElement(); } @@ -98,7 +99,10 @@ class IteratorHC : public IteratorBase { } auto& PrepareAndPush(const EntryT& entry) { - assert(stack_size_ < stack_.size() - 1); + if (stack_.size() < stack_size_ + 1) { + stack_.emplace_back(); + } + assert(stack_size_ < stack_.size()); auto& ni = stack_[stack_size_++]; ni.Init(range_min_, range_max_, entry); return ni; @@ -118,7 +122,7 @@ class IteratorHC : public IteratorBase { return stack_size_ == 0; } - std::array, MAX_BIT_WIDTH> stack_; + std::vector> stack_; size_t stack_size_; const KeyInternal range_min_; const KeyInternal range_max_; @@ -129,7 +133,7 @@ template class NodeIterator { using KeyT = PhPoint; using EntryT = Entry; - using NodeT = Node; + using EntriesT = EntryMap; public: NodeIterator() : iter_{}, mask_lower_{0}, mask_upper_{0}, postfix_len_{0} {} @@ -138,7 +142,7 @@ class NodeIterator { auto& node = entry.GetNode(); CalcLimits(entry.GetNodePostfixLen(), range_min, range_max, entry.GetKey()); iter_ = node.Entries().lower_bound(mask_lower_); - node_ = &node; + entries_ = &node.Entries(); postfix_len_ = entry.GetNodePostfixLen(); } @@ -147,7 +151,7 @@ class NodeIterator { * @return TRUE iff a matching element was found. */ const EntryT* Increment(const KeyT& range_min, const KeyT& range_max) { - while (iter_ != node_->Entries().end() && iter_->first <= mask_upper_) { + while (iter_ != entries_->end() && iter_->first <= mask_upper_) { if (IsPosValid(iter_->first)) { const auto* be = &iter_->second; if (CheckEntry(*be, range_min, range_max)) { @@ -252,14 +256,11 @@ class NodeIterator { } mask_lower_ = lower_limit; mask_upper_ = upper_limit; -// std::cout << "size IT : " << sizeof(iter_) << " + " << sizeof(node_) << " + " -// << sizeof(mask_lower_) << " + " << sizeof(mask_lower_) << " + " -// << sizeof(postfix_len_) << " = " << sizeof(*this) << std::endl; } private: EntryIteratorC iter_; - NodeT* node_; + EntriesT* entries_; hc_pos_t mask_lower_; hc_pos_t mask_upper_; bit_width_t postfix_len_; From 671f60366be385ca04d9116b052a85c7464be517 Mon Sep 17 00:00:00 2001 From: Tilmann Date: Wed, 9 Mar 2022 19:53:01 +0100 Subject: [PATCH 09/16] Update README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 17ab0a47..0fe87fc9 100644 --- a/README.md +++ b/README.md @@ -129,6 +129,7 @@ tree.estimate_count(query); #### Queries * For-each over all elements: `tree.for_each(callback);` + **Note that `for_each` tends to be 10%-20% faster than using an iterator.** * Iterator over all elements: `auto iterator = tree.begin();` * For-each with box shaped window queries: `tree.for_each(PhBoxD(min, max), callback);` * Iterator for box shaped window queries: `auto q = tree.begin_query(PhBoxD(min, max));` @@ -432,7 +433,7 @@ heavily on the actual dataset, usage patterns, hardware, ... . There are numerous ways to improve performance. The following list gives an overview over the possibilities. -1) **Use `for_each` instead of iterators**. This should improve performance of queries by 5%-10%. +1) **Use `for_each` instead of iterators**. This should improve performance of queries by 10%-20%. 2) **Use `emplace_hint` if possible**. When updating the position of an entry, the naive way is to use `erase()` /`emplace()`. With `emplace_hint`, insertion can avoid navigation to the target node if the insertion coordinate is From 7a6b1d877e1faa38ea914c63e96eb484b9141ab9 Mon Sep 17 00:00:00 2001 From: Tilmann Date: Fri, 1 Apr 2022 13:15:49 +0200 Subject: [PATCH 10/16] Fix/issue 14 b tree (#15) --- CHANGELOG.md | 3 + LICENSE | 1 + TODO.txt | 76 +++ WORKSPACE | 6 +- phtree/benchmark/BUILD | 60 ++ phtree/benchmark/benchmark_util.h | 2 +- phtree/benchmark/hd_erase_d_benchmark.cc | 145 +++++ phtree/benchmark/hd_insert_d_benchmark.cc | 132 +++++ phtree/benchmark/hd_knn_d_benchmark.cc | 151 +++++ phtree/benchmark/hd_query_d_benchmark.cc | 214 +++++++ phtree/common/BUILD | 14 + phtree/common/b_plus_tree_map.h | 654 ++++++++++++++++++++++ phtree/common/b_plus_tree_map_test.cc | 185 ++++++ phtree/common/common.h | 1 + phtree/common/flat_sparse_map.h | 4 +- phtree/phtree_test.cc | 21 +- phtree/v16/node.h | 13 +- 17 files changed, 1666 insertions(+), 16 deletions(-) create mode 100644 TODO.txt create mode 100644 phtree/benchmark/hd_erase_d_benchmark.cc create mode 100644 phtree/benchmark/hd_insert_d_benchmark.cc create mode 100644 phtree/benchmark/hd_knn_d_benchmark.cc create mode 100644 phtree/benchmark/hd_query_d_benchmark.cc create mode 100644 phtree/common/b_plus_tree_map.h create mode 100644 phtree/common/b_plus_tree_map_test.cc diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a286281..2a4fb982 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] ### Changed +- DIM>8 now uses custom b_plus_tree_map instead of std::map. This improves performance for all operations, e.g. + window queries on large datasets are up to 4x faster. Benchmarks results can be found in the issue. + [#14](https://github.com/tzaeschke/phtree-cpp/issues/14) - postfix/infix field moved from Node to Entry. This avoids indirections and improves performance of most by ~10%. operations by 5-15%. [#11](https://github.com/tzaeschke/phtree-cpp/issues/11) - Entries now use 'union' to store children. [#9](https://github.com/tzaeschke/phtree-cpp/issues/9) diff --git a/LICENSE b/LICENSE index e46c5961..13cd100a 100644 --- a/LICENSE +++ b/LICENSE @@ -188,6 +188,7 @@ identification within third-party archives. Copyright 2020 Improbable Worlds Limited + Copyright 2022 Tilmann Zäschke Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/TODO.txt b/TODO.txt new file mode 100644 index 00000000..9bf73e5e --- /dev/null +++ b/TODO.txt @@ -0,0 +1,76 @@ +Fix const-ness +============== +- operator[] should have a const overload +- find() should have a non-const overload +- test: + +TEST(PhTreeTest, SmokeTestConstTree) { + // Test edge case: only one entry in tree + PhPoint<3> p{1, 2, 3}; + TestTree<3, Id> tree1; + tree1.emplace(p, Id{1}); + tree1.emplace(p, Id{2}); + Id id3{3}; + tree1.insert(p, id3); + Id id4{4}; + tree1.insert(p, id4); + const auto& tree = tree1; + ASSERT_EQ(tree.size(), 1); + ASSERT_EQ(tree.find(p).second()._i, 1); + ASSERT_EQ(tree[p]._i, 1); + + auto q_window = tree.begin_query({p, p}); + ASSERT_EQ(1, q_window->_i); + ++q_window; + ASSERT_EQ(q_window, tree.end()); + + auto q_extent = tree.begin(); + ASSERT_EQ(1, q_extent->_i); + ++q_extent; + ASSERT_EQ(q_extent, tree.end()); + + auto q_knn = tree.begin_knn_query(10, p, DistanceEuclidean<3>()); + ASSERT_EQ(1, q_knn->_i); + ++q_knn; + ASSERT_EQ(q_knn, tree.end()); + + ASSERT_EQ(1, tree1.erase(p)); + ASSERT_EQ(0, tree.size()); + ASSERT_EQ(0, tree1.erase(p)); + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); +} + + +b_plus_tree_map - binary search +=============== +Use custom binary search: + + // return BptEntry* ?!?!? + template + [[nodiscard]] auto lower_bound(key_t key, std::vector& data) noexcept { + return std::lower_bound(data.begin(), data.end(), key, [](E& left, const key_t key) { + return left.first < key; + }); + // auto pos = __lower_bound(&*data_leaf_.begin(), &*data_leaf_.end(), key); + // return data_leaf_.begin() + pos; + } + + template + inline auto __lower_bound(const TT* __first, const TT* __last, key_t __val) const noexcept { + const TT* const_first = __first; + auto __len = __last - __first; + + while (__len > 0) { + auto __half = __len >> 1; + const TT* __middle = __first + __half; + if (__middle->first < __val) { + __first = __middle; + ++__first; + __len = __len - __half - 1; + } else + __len = __half; + } + return __first - const_first; + } + diff --git a/WORKSPACE b/WORKSPACE index be61fc70..e22c6961 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -34,9 +34,9 @@ http_archive( http_archive( name = "gbenchmark", - sha256 = "dccbdab796baa1043f04982147e67bb6e118fe610da2c65f88912d73987e700c", - strip_prefix = "benchmark-1.5.2", - url = "https://github.com/google/benchmark/archive/v1.5.2.tar.gz", + sha256 = "6132883bc8c9b0df5375b16ab520fac1a85dc9e4cf5be59480448ece74b278d4", + strip_prefix = "benchmark-1.6.1", + url = "https://github.com/google/benchmark/archive/v1.6.1.tar.gz", ) http_archive( diff --git a/phtree/benchmark/BUILD b/phtree/benchmark/BUILD index 95315788..2503b852 100644 --- a/phtree/benchmark/BUILD +++ b/phtree/benchmark/BUILD @@ -304,3 +304,63 @@ cc_binary( "@spdlog", ], ) + +cc_binary( + name = "hd_insert_d_benchmark", + testonly = True, + srcs = [ + "hd_insert_d_benchmark.cc", + ], + linkstatic = True, + deps = [ + "//phtree", + "//phtree/benchmark", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + +cc_binary( + name = "hd_erase_d_benchmark", + testonly = True, + srcs = [ + "hd_erase_d_benchmark.cc", + ], + linkstatic = True, + deps = [ + "//phtree", + "//phtree/benchmark", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + +cc_binary( + name = "hd_query_d_benchmark", + testonly = True, + srcs = [ + "hd_query_d_benchmark.cc", + ], + linkstatic = True, + deps = [ + "//phtree", + "//phtree/benchmark", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + +cc_binary( + name = "hd_knn_d_benchmark", + testonly = True, + srcs = [ + "hd_knn_d_benchmark.cc", + ], + linkstatic = True, + deps = [ + "//phtree", + "//phtree/benchmark", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) diff --git a/phtree/benchmark/benchmark_util.h b/phtree/benchmark/benchmark_util.h index 5af70367..8aef78a7 100644 --- a/phtree/benchmark/benchmark_util.h +++ b/phtree/benchmark/benchmark_util.h @@ -91,7 +91,7 @@ auto CreateDuplicates = }; } // namespace -enum TestGenerator { CUBE, CLUSTER }; +enum TestGenerator { CUBE = 4, CLUSTER = 7 }; template auto CreatePointDataMinMax = [](auto& points, diff --git a/phtree/benchmark/hd_erase_d_benchmark.cc b/phtree/benchmark/hd_erase_d_benchmark.cc new file mode 100644 index 00000000..90fd8072 --- /dev/null +++ b/phtree/benchmark/hd_erase_d_benchmark.cc @@ -0,0 +1,145 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "logging.h" +#include "phtree/benchmark/benchmark_util.h" +#include "phtree/phtree.h" +#include +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +namespace { + +const int GLOBAL_MAX = 10000; + +/* + * Benchmark for removing entries. + */ +template +class IndexBenchmark { + public: + IndexBenchmark(benchmark::State& state); + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + void Insert(benchmark::State& state, PhTreeD& tree); + void Remove(benchmark::State& state, PhTreeD& tree); + + const TestGenerator data_type_; + const int num_entities_; + + std::default_random_engine random_engine_; + std::uniform_real_distribution<> cube_distribution_; + std::vector> points_; +}; + +template +IndexBenchmark::IndexBenchmark(benchmark::State& state) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) +, random_engine_{1} +, cube_distribution_{0, GLOBAL_MAX} +, points_(state.range(0)) { + logging::SetupDefaultLogging(); + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + for (auto _ : state) { + state.PauseTiming(); + auto* tree = new PhTreeD(); + Insert(state, *tree); + state.ResumeTiming(); + + Remove(state, *tree); + + state.PauseTiming(); + // avoid measuring deallocation + delete tree; + state.ResumeTiming(); + } +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); + + state.counters["total_remove_count"] = benchmark::Counter(0); + state.counters["remove_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + + logging::info("World setup complete."); +} + +template +void IndexBenchmark::Insert(benchmark::State&, PhTreeD& tree) { + for (int i = 0; i < num_entities_; ++i) { + tree.emplace(points_[i], i); + } +} + +template +void IndexBenchmark::Remove(benchmark::State& state, PhTreeD& tree) { + int n = 0; + for (int i = 0; i < num_entities_; ++i) { + n += tree.erase(points_[i]); + } + + state.counters["total_remove_count"] += n; + state.counters["remove_rate"] += n; +} + +} // namespace + +template +void PhTree6D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<6> benchmark{state}; + benchmark.Benchmark(state); +} + +template +void PhTree10D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<10> benchmark{state}; + benchmark.Benchmark(state); +} + +template +void PhTree20D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<20> benchmark{state}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_generator, num_entities +BENCHMARK_CAPTURE(PhTree6D, ERASE, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree10D, ERASE, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree20D, ERASE, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/phtree/benchmark/hd_insert_d_benchmark.cc b/phtree/benchmark/hd_insert_d_benchmark.cc new file mode 100644 index 00000000..f2389ae8 --- /dev/null +++ b/phtree/benchmark/hd_insert_d_benchmark.cc @@ -0,0 +1,132 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "logging.h" +#include "phtree/benchmark/benchmark_util.h" +#include "phtree/phtree.h" +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +namespace { + +const double GLOBAL_MAX = 10000; + +/* + * Benchmark for adding entries to the index. + */ +template +class IndexBenchmark { + using Index = PhTreeD; + + public: + explicit IndexBenchmark(benchmark::State& state); + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + void Insert(benchmark::State& state, Index& tree); + + const TestGenerator data_type_; + const int num_entities_; + std::vector> points_; +}; + +template +IndexBenchmark::IndexBenchmark(benchmark::State& state) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) +, points_(state.range(0)) { + logging::SetupDefaultLogging(); + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + for (auto _ : state) { + state.PauseTiming(); + auto* tree = new Index(); + state.ResumeTiming(); + + Insert(state, *tree); + + // we do this top avoid measuring deallocation + state.PauseTiming(); + delete tree; + state.ResumeTiming(); + } +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); + + state.counters["total_put_count"] = benchmark::Counter(0); + state.counters["put_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + + logging::info("World setup complete."); +} + +template +void IndexBenchmark::Insert(benchmark::State& state, Index& tree) { + for (int i = 0; i < num_entities_; ++i) { + PhPointD& p = points_[i]; + tree.emplace(p, i); + } + + state.counters["total_put_count"] += num_entities_; + state.counters["put_rate"] += num_entities_; +} + +} // namespace + +template +void PhTree6D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<6> benchmark{state}; + benchmark.Benchmark(state); +} + +template +void PhTree10D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<10> benchmark{state}; + benchmark.Benchmark(state); +} + +template +void PhTree20D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<20> benchmark{state}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_generator, num_entities +BENCHMARK_CAPTURE(PhTree6D, INSERT, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree10D, INSERT, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree20D, INSERT, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/phtree/benchmark/hd_knn_d_benchmark.cc b/phtree/benchmark/hd_knn_d_benchmark.cc new file mode 100644 index 00000000..d1fabd42 --- /dev/null +++ b/phtree/benchmark/hd_knn_d_benchmark.cc @@ -0,0 +1,151 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "logging.h" +#include "phtree/benchmark/benchmark_util.h" +#include "phtree/phtree.h" +#include +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +namespace { + +const double GLOBAL_MAX = 10000; + +/* + * Benchmark for k-nearest-neighbour queries. + */ +template +class IndexBenchmark { + public: + IndexBenchmark(benchmark::State& state); + + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + void QueryWorld(benchmark::State& state, PhPointD& center); + void CreateQuery(PhPointD& center); + + const TestGenerator data_type_; + const int num_entities_; + const double knn_result_size_; + + PhTreeD tree_; + std::default_random_engine random_engine_; + std::uniform_real_distribution<> cube_distribution_; + std::vector> points_; +}; + +template +IndexBenchmark::IndexBenchmark(benchmark::State& state) +: data_type_{static_cast(state.range(2))} +, num_entities_(state.range(0)) +, knn_result_size_(state.range(1)) +, random_engine_{1} +, cube_distribution_{0, GLOBAL_MAX} +, points_(state.range(0)) { + logging::SetupDefaultLogging(); + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + for (auto _ : state) { + state.PauseTiming(); + PhPointD center; + CreateQuery(center); + state.ResumeTiming(); + + QueryWorld(state, center); + } +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); + for (int i = 0; i < num_entities_; ++i) { + tree_.emplace(points_[i], i); + } + + state.counters["total_query_count"] = benchmark::Counter(0); + state.counters["query_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["avg_result_count"] = benchmark::Counter(0, benchmark::Counter::kAvgIterations); + + logging::info("World setup complete."); +} + +template +void IndexBenchmark::QueryWorld(benchmark::State& state, PhPointD& center) { + int n = 0; + for (auto q = tree_.begin_knn_query(knn_result_size_, center, DistanceEuclidean()); + q != tree_.end(); + ++q) { + ++n; + } + + state.counters["total_query_count"] += 1; + state.counters["query_rate"] += 1; + state.counters["avg_result_count"] += n; +} + +template +void IndexBenchmark::CreateQuery(PhPointD& center) { + for (dimension_t d = 0; d < DIM; ++d) { + center[d] = cube_distribution_(random_engine_) * GLOBAL_MAX; + } +} + +} // namespace + +template +void PhTree6D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<6> benchmark{state}; + benchmark.Benchmark(state); +} + +template +void PhTree10D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<10> benchmark{state}; + benchmark.Benchmark(state); +} + +template +void PhTree20D(benchmark::State& state, Arguments&&...) { + IndexBenchmark<20> benchmark{state}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_type, num_entities, query_result_size +BENCHMARK_CAPTURE(PhTree6D, KNN, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {1, 10}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree10D, KNN, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {1, 10}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree20D, KNN, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {1, 10}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/phtree/benchmark/hd_query_d_benchmark.cc b/phtree/benchmark/hd_query_d_benchmark.cc new file mode 100644 index 00000000..56959770 --- /dev/null +++ b/phtree/benchmark/hd_query_d_benchmark.cc @@ -0,0 +1,214 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "logging.h" +#include "phtree/benchmark/benchmark_util.h" +#include "phtree/phtree.h" +#include +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +namespace { + +const double GLOBAL_MAX = 10000; + +enum QueryType { MIN_MAX_ITER, MIN_MAX_FOR_EACH }; + +template +using BoxType = PhBoxD; + +template +using PointType = PhPointD; + +template +using TreeType = PhTreeD; + +/* + * Benchmark for window queries. + */ +template +class IndexBenchmark { + public: + IndexBenchmark(benchmark::State& state, double avg_query_result_size_ = 100); + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + void QueryWorld(benchmark::State& state, BoxType& query_box); + void CreateQuery(BoxType& query_box); + + const TestGenerator data_type_; + const int num_entities_; + const double avg_query_result_size_; + + constexpr int query_edge_length() { + return GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM); + }; + + TreeType tree_; + std::default_random_engine random_engine_; + std::uniform_real_distribution<> cube_distribution_; + std::vector> points_; +}; + +template +IndexBenchmark::IndexBenchmark( + benchmark::State& state, double avg_query_result_size) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) +, avg_query_result_size_(avg_query_result_size) +, tree_{} +, random_engine_{1} +, cube_distribution_{0, GLOBAL_MAX} +, points_(state.range(0)) { + logging::SetupDefaultLogging(); + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + for (auto _ : state) { + state.PauseTiming(); + BoxType query_box; + CreateQuery(query_box); + state.ResumeTiming(); + + QueryWorld(state, query_box); + } +} + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); + for (int i = 0; i < num_entities_; ++i) { + tree_.emplace(points_[i], i); + } + + state.counters["total_result_count"] = benchmark::Counter(0); + state.counters["query_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["result_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["avg_result_count"] = benchmark::Counter(0, benchmark::Counter::kAvgIterations); + + logging::info("World setup complete."); +} + +template +struct Counter { + void operator()(PointType, T&) { + ++n_; + } + + size_t n_ = 0; +}; + +template +size_t Count_MMI(TreeType& tree, BoxType& query_box) { + size_t n = 0; + for (auto q = tree.begin_query(query_box); q != tree.end(); ++q) { + ++n; + } + return n; +} + +template +size_t Count_MMFE(TreeType& tree, BoxType& query_box) { + Counter callback; + tree.for_each(query_box, callback); + return callback.n_; +} + +template +void IndexBenchmark::QueryWorld(benchmark::State& state, BoxType& query_box) { + int n = 0; + switch (QUERY_TYPE) { + case MIN_MAX_ITER: + n = Count_MMI(tree_, query_box); + break; + case MIN_MAX_FOR_EACH: + n = Count_MMFE(tree_, query_box); + break; + } + + state.counters["total_result_count"] += n; + state.counters["query_rate"] += 1; + state.counters["result_rate"] += n; + state.counters["avg_result_count"] += n; +} + +template +void IndexBenchmark::CreateQuery(BoxType& query_box) { + int length = query_edge_length(); + // scale to ensure query lies within boundary + double scale = (GLOBAL_MAX - (double)length) / GLOBAL_MAX; + for (dimension_t d = 0; d < DIM; ++d) { + auto s = cube_distribution_(random_engine_); + s = s * scale; + query_box.min()[d] = s; + query_box.max()[d] = s + length; + } +} + +} // namespace + +template +void PhTree6D_FE(benchmark::State& state, Arguments&&...) { + IndexBenchmark<6, MIN_MAX_FOR_EACH> benchmark{state}; + benchmark.Benchmark(state); +} + +template +void PhTree6D_IT(benchmark::State& state, Arguments&&...) { + IndexBenchmark<6, MIN_MAX_ITER> benchmark{state}; + benchmark.Benchmark(state); +} + +template +void PhTree10D_IT(benchmark::State& state, Arguments&&...) { + IndexBenchmark<10, MIN_MAX_ITER> benchmark{state}; + benchmark.Benchmark(state); +} + +template +void PhTree20D_IT(benchmark::State& state, Arguments&&...) { + IndexBenchmark<20, MIN_MAX_ITER> benchmark{state}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_type, num_entities, query_result_size +BENCHMARK_CAPTURE(PhTree6D_FE, WQ, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree6D_IT, WQ, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree10D_IT, WQ, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree20D_IT, WQ, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/phtree/common/BUILD b/phtree/common/BUILD index 7ef3b6bf..35ba9029 100644 --- a/phtree/common/BUILD +++ b/phtree/common/BUILD @@ -11,6 +11,7 @@ cc_library( "distance.h", "filter.h", "flat_array_map.h", + "b_plus_tree_map.h", "flat_sparse_map.h", "tree_stats.h", ], @@ -99,6 +100,19 @@ cc_test( ], ) +cc_test( + name = "b_plus_tree_map_test", + timeout = "long", + srcs = [ + "b_plus_tree_map_test.cc", + ], + linkstatic = True, + deps = [ + ":common", + "//phtree/testing/gtest_main", + ], +) + cc_test( name = "flat_sparse_map_test", timeout = "long", diff --git a/phtree/common/b_plus_tree_map.h b/phtree/common/b_plus_tree_map.h new file mode 100644 index 00000000..ef2fb88f --- /dev/null +++ b/phtree/common/b_plus_tree_map.h @@ -0,0 +1,654 @@ +/* + * Copyright 2022 Tilmann Zäschke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_COMMON_B_PLUS_TREE_H +#define PHTREE_COMMON_B_PLUS_TREE_H + +#include "bits.h" +#include +#include +#include + +/* + * PLEASE do not include this file directly, it is included via common.h. + * + * This file contains the B+tree implementation which is used in high-dimensional nodes in + * the PH-Tree. + */ +namespace improbable::phtree { + +/* + * The b_plus_tree_map is a B+tree implementation that uses a hierarchy of horizontally + * connected nodes for fast traversal through all entries. + * + * The individual nodes have at most M entries. + * The tree has O(log n) lookup and O(M log n) insertion/removal time complexity, + * space complexity is O(n). + * + * Tree structure: + * - Inner nodes: have other nodes as children; their key of an entry represents the highest + * key of any subnode in that entry + * - Leaf nodes: have values as children; their key represents the key of a key/value pair + * - Every node is either a leaf (l-node; contains values) or an inner node + * (n-node; contains nodes). + * - "Sibling" nodes refer to the nodes linked by prev_node_ or next_node_. Sibling nodes + * usually have the same parent but may also be children of their parent's siblings. + * + * - Guarantee: All leaf nodes are horizontally connected + * - Inner nodes may or may not be connected. Specifically: + * - New inner nodes will be assigned siblings from the same parent or the parent's sibling + * (if the new node is the first or last node in a parent) + * - There is no guarantee that inner nodes know about their potential sibling (=other inner + * nodes that own bordering values/child-nodes). + * - There is no guarantee that siblings are on the same depth of the tree. + * - The tree is not balanced + * + * TODO since this is a "map" (with 1:1 mapping of key:value), we could optimize splitting and + * merging by trying to reduce `dead space` + * (space between key1 and key2 that exceeds (key2 - key1)). + */ +template +class b_plus_tree_map { + class bpt_node_base; + template + class bpt_node_data; + class bpt_node_leaf; + class bpt_node_inner; + class bpt_iterator; + + using key_t = std::uint64_t; + + using bpt_entry_inner = std::pair; + using bpt_entry_leaf = std::pair; + + using IterT = bpt_iterator; + using NodeT = bpt_node_base; + using NLeafT = bpt_node_leaf; + using NInnerT = bpt_node_inner; + using LeafIteratorT = decltype(std::vector().begin()); + using TreeT = b_plus_tree_map; + + public: + explicit b_plus_tree_map() : root_{new NLeafT(nullptr, nullptr, nullptr)}, size_{0} {}; + + ~b_plus_tree_map() { + delete root_; + } + + [[nodiscard]] auto find(key_t key) noexcept { + auto node = root_; + while (!node->is_leaf()) { + node = node->as_inner()->find(key); + if (node == nullptr) { + return end(); + } + } + return node->as_leaf()->find(key); + } + + [[nodiscard]] auto find(key_t key) const noexcept { + return const_cast(*this).find(key); + } + + [[nodiscard]] auto lower_bound(key_t key) noexcept { + auto node = root_; + while (!node->is_leaf()) { + node = node->as_inner()->find(key); + if (node == nullptr) { + return end(); + } + } + return node->as_leaf()->lower_bound_as_iter(key); + } + + [[nodiscard]] auto begin() noexcept { + return IterT(root_); + } + + [[nodiscard]] auto begin() const noexcept { + return IterT(root_); + } + + [[nodiscard]] auto cbegin() const noexcept { + return IterT(root_); + } + + [[nodiscard]] auto end() noexcept { + return IterT(); + } + + [[nodiscard]] auto end() const noexcept { + return IterT(); + } + + template + auto emplace(Args&&... args) { + return try_emplace_base(std::forward(args)...); + } + + template + auto try_emplace(key_t key, Args&&... args) { + return try_emplace_base(key, std::forward(args)...); + } + + void erase(key_t key) { + auto node = root_; + while (!node->is_leaf()) { + node = node->as_inner()->find(key); + if (node == nullptr) { + return; + } + } + size_ -= node->as_leaf()->erase_key(key, *this); + } + + void erase(const IterT& iterator) { + assert(iterator != end()); + --size_; + iterator.node_->erase_it(iterator.iter_, *this); + } + + [[nodiscard]] size_t size() const noexcept { + return size_; + } + + void _check() { + size_t count = 0; + NLeafT* prev_leaf = nullptr; + key_t known_min = std::numeric_limits::max(); + root_->_check(count, nullptr, prev_leaf, known_min, 0); + assert(count == size()); + } + + private: + template + auto try_emplace_base(key_t key, Args&&... args) { + auto node = root_; + while (!node->is_leaf()) { + node = node->as_inner()->find_or_last(key); + } + return node->as_leaf()->try_emplace(key, *this, size_, std::forward(args)...); + } + + class bpt_node_base { + public: + explicit bpt_node_base(bool is_leaf, NInnerT* parent) noexcept + : is_leaf_{is_leaf}, parent_{parent} {} + + virtual ~bpt_node_base() noexcept = default; + + [[nodiscard]] inline bool is_leaf() const noexcept { + return is_leaf_; + } + + [[nodiscard]] inline NInnerT* as_inner() noexcept { + assert(!is_leaf_); + return static_cast(this); + } + + [[nodiscard]] inline NLeafT* as_leaf() noexcept { + assert(is_leaf_); + return static_cast(this); + } + + virtual void _check(size_t&, NInnerT*, NLeafT*&, key_t&, key_t) = 0; + + public: + const bool is_leaf_; + NInnerT* parent_; + }; + + template + class bpt_node_data : public bpt_node_base { + using DataIteratorT = decltype(std::vector().begin()); + friend IterT; + + constexpr static size_t M_leaf = std::min(size_t(16), COUNT_MAX); + // Default MAX is 32. Special case for small COUNT with smaller inner leaf or + // trees with a single inner leaf. '*2' is added because leaf filling is not compact. + constexpr static size_t M_inner = std::min(size_t(16), COUNT_MAX / M_leaf * 2); + // TODO This could be improved but requires a code change to move > 1 entry when merging. + constexpr static size_t M_leaf_min = 2; // std::max((size_t)2, M_leaf >> 2); + constexpr static size_t M_inner_min = 2; // std::max((size_t)2, M_inner >> 2); + // There is no point in allocating more leaf space than the max amount of entries. + constexpr static size_t M_leaf_init = std::min(size_t(8), COUNT_MAX); + constexpr static size_t M_inner_init = 4; + + public: + explicit bpt_node_data(bool is_leaf, NInnerT* parent, ThisT* prev, ThisT* next) noexcept + : bpt_node_base(is_leaf, parent), data_{}, prev_node_{prev}, next_node_{next} { + data_.reserve(this->M_init()); + } + + virtual ~bpt_node_data() noexcept = default; + + [[nodiscard]] inline size_t M_min() { + return this->is_leaf_ ? M_leaf_min : M_inner_min; + } + + [[nodiscard]] inline size_t M_max() { + return this->is_leaf_ ? M_leaf : M_inner; + } + + [[nodiscard]] inline size_t M_init() { + return this->is_leaf_ ? M_leaf_init : M_inner_init; + } + + [[nodiscard]] auto lower_bound(key_t key) noexcept { + return std::lower_bound( + data_.begin(), data_.end(), key, [](EntryT& left, const key_t key) { + return left.first < key; + }); + } + + [[nodiscard]] size_t size() const noexcept { + return data_.size(); + } + + void erase_entry(DataIteratorT it_to_erase, TreeT& tree) { + auto& parent_ = this->parent_; + key_t max_key_old = data_.back().first; + + data_.erase(it_to_erase); + if (parent_ == nullptr) { + if constexpr (std::is_same_v) { + if (data_.size() < 2) { + auto remaining_node = data_.begin()->second; + data_.begin()->second = nullptr; + remaining_node->parent_ = nullptr; + tree.root_ = remaining_node; + delete this; + } + } + return; + } + + if (data_.empty()) { + // Nothing to merge, just remove node. This should be rare, i.e. only happens when + // a rare 1-entry node has its last entry removed. + remove_from_siblings(); + parent_->remove_node(max_key_old, tree); + return; + } + + if (data_.size() < this->M_min()) { + // merge + if (prev_node_ != nullptr && prev_node_->data_.size() < this->M_max()) { + remove_from_siblings(); + auto& prev_data = prev_node_->data_; + if constexpr (std::is_same_v) { + prev_data.emplace_back(std::move(data_[0])); + } else { + data_[0].second->parent_ = prev_node_; + prev_data.emplace_back(std::move(data_[0])); + data_[0].second = nullptr; + } + auto prev_node = prev_node_; // create copy because (this) will be deleted + parent_->remove_node(max_key_old, tree); + if (prev_node->parent_ != nullptr) { + key_t old1 = (prev_data.end() - 2)->first; + key_t new1 = (prev_data.end() - 1)->first; + prev_node->parent_->update_key(old1, new1); + } + return; + } else if (next_node_ != nullptr && next_node_->data_.size() < this->M_max()) { + remove_from_siblings(); + auto& next_data = next_node_->data_; + if constexpr (std::is_same_v) { + next_data.emplace(next_data.begin(), std::move(data_[0])); + } else { + data_[0].second->parent_ = next_node_; + next_data.emplace(next_data.begin(), std::move(data_[0])); + data_[0].second = nullptr; + } + parent_->remove_node(max_key_old, tree); + return; + } + // This node is too small but there is nothing we can do. + } + if (it_to_erase == data_.end()) { + parent_->update_key(max_key_old, data_.back().first); + } + } + + auto prepare_emplace(key_t key, TreeT& tree, DataIteratorT& it_in_out) { + if (data_.size() < this->M_max()) { + if (this->parent_ != nullptr && key > data_.back().first) { + this->parent_->update_key(data_.back().first, key); + } + return static_cast(this); + } + + ThisT* dest = this->split_node(key, tree); + if (dest != this) { + // The insertion pos in node2 can be calculated: + auto old_pos = it_in_out - data_.begin(); + it_in_out = dest->data_.begin() + old_pos - data_.size(); + } + return dest; + } + + void _check_data(NInnerT* parent, key_t known_max) { + (void)parent; + (void)known_max; + // assert(parent_ == nullptr || data_.size() >= M_min); + assert(this->parent_ == parent); + if (this->data_.empty()) { + assert(parent == nullptr); + return; + } + assert(this->parent_ == nullptr || known_max == this->data_.back().first); + } + + private: + ThisT* split_node(key_t key, TreeT& tree) { + auto max_key = data_.back().first; + if (this->parent_ == nullptr) { + auto* new_parent = new NInnerT(nullptr, nullptr, nullptr); + new_parent->emplace_back(max_key, this); + tree.root_ = new_parent; + this->parent_ = new_parent; + } + + // create new node + auto* node2 = new ThisT(this->parent_, static_cast(this), next_node_); + if (next_node_ != nullptr) { + next_node_->prev_node_ = node2; + } + next_node_ = node2; + + // populate new node + // TODO Optimize populating new node: move 1st part, insert new value, move 2nd part...? + auto split_pos = this->M_max() >> 1; + node2->data_.insert( + node2->data_.end(), + std::make_move_iterator(data_.begin() + split_pos), + std::make_move_iterator(data_.end())); + data_.erase(data_.begin() + split_pos, data_.end()); + + if constexpr (std::is_same_v) { + for (auto& e : node2->data_) { + e.second->parent_ = node2; + } + } + + // Add node to parent + auto split_key = data_[split_pos - 1].first; + if (key > split_key && key < node2->data_[0].first) { + // This is a bit hacky: + // Add new entry at END of first node when possible -> avoids some shifting + split_key = key; + } + this->parent_->update_key_and_add_node( + max_key, split_key, std::max(max_key, key), node2, tree); + + // Return node for insertion of new value + return key > split_key ? node2 : static_cast(this); + } + + void remove_from_siblings() { + if (next_node_ != nullptr) { + next_node_->prev_node_ = prev_node_; + } + if (prev_node_ != nullptr) { + prev_node_->next_node_ = next_node_; + } + } + + protected: + std::vector data_; + ThisT* prev_node_; + ThisT* next_node_; + }; + + class bpt_node_leaf : public bpt_node_data { + public: + explicit bpt_node_leaf(NInnerT* parent, NLeafT* prev, NLeafT* next) noexcept + : bpt_node_data(true, parent, prev, next) {} + + ~bpt_node_leaf() noexcept = default; + + [[nodiscard]] IterT find(key_t key) noexcept { + auto it = this->lower_bound(key); + if (it != this->data_.end() && it->first == key) { + return IterT(this, it); + } + return IterT(); + } + + [[nodiscard]] IterT lower_bound_as_iter(key_t key) noexcept { + auto it = this->lower_bound(key); + if (it != this->data_.end()) { + return IterT(this, it); + } + return IterT(); + } + + template + auto try_emplace(key_t key, TreeT& tree, size_t& entry_count, Args&&... args) { + auto it = this->lower_bound(key); + if (it != this->data_.end() && it->first == key) { + return std::make_pair(it, false); + } + ++entry_count; + + auto dest = this->prepare_emplace(key, tree, it); + + auto x = dest->data_.emplace( + it, + std::piecewise_construct, + std::forward_as_tuple(key), + std::forward_as_tuple(std::forward(args)...)); + return std::make_pair(x, true); + } + + bool erase_key(key_t key, TreeT& tree) { + auto it = this->lower_bound(key); + if (it != this->data_.end() && it->first == key) { + this->erase_entry(it, tree); + return true; + } + return false; + } + + void erase_it(LeafIteratorT iter, TreeT& tree) { + this->erase_entry(iter, tree); + } + + void _check( + size_t& count, NInnerT* parent, NLeafT*& prev_leaf, key_t& known_min, key_t known_max) { + this->_check_data(parent, known_max); + + assert(prev_leaf == this->prev_node_); + for (auto& e : this->data_) { + assert(count == 0 || e.first > known_min); + assert(this->parent_ == nullptr || e.first <= known_max); + ++count; + known_min = e.first; + } + prev_leaf = this; + } + }; + + class bpt_node_inner : public bpt_node_data { + public: + explicit bpt_node_inner(NInnerT* parent, NInnerT* prev, NInnerT* next) noexcept + : bpt_node_data(false, parent, prev, next) {} + + ~bpt_node_inner() noexcept { + for (auto& e : this->data_) { + if (e.second != nullptr) { + delete e.second; + } + } + } + + [[nodiscard]] NodeT* find(key_t key) noexcept { + auto it = this->lower_bound(key); + return it != this->data_.end() ? it->second : nullptr; + } + + [[nodiscard]] NodeT* find_or_last(key_t key) noexcept { + auto it = this->lower_bound(key); + return it != this->data_.end() ? it->second : this->data_.back().second; + } + + void emplace_back(key_t key, NodeT* node) { + this->data_.emplace_back(key, node); + } + + void _check( + size_t& count, NInnerT* parent, NLeafT*& prev_leaf, key_t& known_min, key_t known_max) { + this->_check_data(parent, known_max); + + assert(this->parent_ == nullptr || known_max == this->data_.back().first); + auto prev_key = this->data_[0].first; + int n = 0; + for (auto& e : this->data_) { + assert(n == 0 || e.first > prev_key); + e.second->_check(count, this, prev_leaf, known_min, e.first); + assert(this->parent_ == nullptr || e.first <= known_max); + prev_key = e.first; + ++n; + } + } + + void update_key(key_t old_key, key_t new_key) { + assert(new_key != old_key); + auto it = this->lower_bound(old_key); + assert(it != this->data_.end()); + assert(it->first == old_key); + it->first = new_key; + if (this->parent_ != nullptr && ++it == this->data_.end()) { + this->parent_->update_key(old_key, new_key); + } + } + + /* + * This method does two things: + * - It changes the key of the node (node 1) at 'key1_old' to 'key1_new'. + * - It inserts a new node (node 2) after 'new_key1' with value 'key2' + * Invariants: + * - Node1: key1_old > key1_new; Node 1 vs 2: key2 > new_key1 + */ + void update_key_and_add_node( + key_t key1_old, key_t key1_new, key_t key2, NodeT* child2, TreeT& tree) { + assert(key2 > key1_new); + assert(key1_old >= key1_new); + auto it2 = this->lower_bound(key1_old) + 1; + + auto dest = this->prepare_emplace(key2, tree, it2); + // prepare_emplace() guarantees that child2 is in the same node as child1 + assert(it2 != dest->data_.begin()); + (it2 - 1)->first = key1_new; + child2->parent_ = dest; + dest->data_.emplace(it2, key2, child2); + } + + void remove_node(key_t key_remove, TreeT& tree) { + auto it_to_erase = this->lower_bound(key_remove); + delete it_to_erase->second; + this->erase_entry(it_to_erase, tree); + } + }; + + class bpt_iterator { + using EntryT = typename b_plus_tree_map::bpt_entry_leaf; + friend b_plus_tree_map; + + public: + using iterator_category = std::forward_iterator_tag; + using value_type = T; + using difference_type = std::ptrdiff_t; + using pointer = T*; + using reference = T&; + + // Arbitrary position iterator + explicit bpt_iterator(NLeafT* node, LeafIteratorT it) noexcept : node_{node}, iter_{it} { + assert(node->is_leaf_ && "just for consistency, insist that we iterate leaves only "); + } + + // begin() iterator + explicit bpt_iterator(NodeT* node) noexcept { + assert(node->parent_ == nullptr && "must start with root node"); + // move iterator to first value + while (!node->is_leaf_) { + node = node->as_inner()->data_[0].second; + } + node_ = node->as_leaf(); + + if (node_->size() == 0) { + node_ = nullptr; + iter_ = {}; + return; + } + iter_ = node_->data_.begin(); + } + + // end() iterator + bpt_iterator() noexcept : node_{nullptr}, iter_{} {} + + auto& operator*() const noexcept { + assert(AssertNotEnd()); + return const_cast(*iter_); + } + + auto* operator->() const noexcept { + assert(AssertNotEnd()); + return const_cast(&*iter_); + } + + auto& operator++() noexcept { + assert(AssertNotEnd()); + ++iter_; + if (iter_ == node_->data_.end()) { + // this may be a nullptr -> end of data + node_ = node_->next_node_; + iter_ = node_ != nullptr ? node_->data_.begin() : LeafIteratorT{}; + } + return *this; + } + + auto operator++(int) noexcept { + IterT iterator(*this); + ++(*this); + return iterator; + } + + friend bool operator==(const IterT& left, const IterT& right) noexcept { + return left.iter_ == right.iter_ && left.node_ == right.node_; + } + + friend bool operator!=(const IterT& left, const IterT& right) noexcept { + return !(left == right); + } + + private: + [[nodiscard]] inline bool AssertNotEnd() const noexcept { + return node_ != nullptr; + } + + NLeafT* node_; + LeafIteratorT iter_; + }; + + private: + NodeT* root_; + size_t size_; +}; +} // namespace improbable::phtree + +#endif // PHTREE_COMMON_B_PLUS_TREE_H diff --git a/phtree/common/b_plus_tree_map_test.cc b/phtree/common/b_plus_tree_map_test.cc new file mode 100644 index 00000000..7d9e0bb5 --- /dev/null +++ b/phtree/common/b_plus_tree_map_test.cc @@ -0,0 +1,185 @@ +/* + * Copyright 2022 Tilmann Zäschke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "b_plus_tree_map.h" +#include +#include + +using namespace improbable::phtree; + +TEST(PhTreeFlatSparseMapTest, SmokeTest) { + const int max_size = 200; + + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, max_size - 1); + + for (int i = 0; i < 10; i++) { + b_plus_tree_map test_map; + std::map reference_map; + for (int j = 0; j < 2 * max_size; j++) { + size_t val = cube_distribution(random_engine); + bool hasVal = test_map.find(val) != test_map.end(); + bool hasValRef = reference_map.find(val) != reference_map.end(); + ASSERT_EQ(hasVal, hasValRef); + if (!hasVal) { + reference_map.emplace(val, val); + test_map.emplace(val, val); + test_map._check(); + } + ASSERT_EQ(test_map.size(), reference_map.size()); + for (auto it : reference_map) { + size_t vRef = it.first; + size_t vMap = test_map.find(vRef)->second; + ASSERT_EQ(vMap, vRef); + } + for (auto it : test_map) { + size_t v = it.first; + size_t vRef = reference_map.find(v)->second; + size_t vMap = test_map.find(v)->second; + ASSERT_EQ(vMap, vRef); + } + } + } +} + +TEST(PhTreeFlatSparseMapTest, SmokeTestWithTryEmplace) { + const int max_size = 200; + + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, max_size - 1); + + for (int i = 0; i < 10; i++) { + b_plus_tree_map test_map; + std::map reference_map; + for (int j = 0; j < 2 * max_size; j++) { + size_t val = cube_distribution(random_engine); + bool hasVal = test_map.find(val) != test_map.end(); + bool hasValRef = reference_map.find(val) != reference_map.end(); + ASSERT_EQ(hasVal, hasValRef); + if (!hasVal) { + reference_map.emplace(val, val); + test_map.try_emplace(val, val); + } + ASSERT_EQ(test_map.size(), reference_map.size()); + for (auto it : reference_map) { + size_t vRef = it.first; + size_t vMap = test_map.find(vRef)->second; + ASSERT_EQ(vMap, vRef); + } + for (auto it : test_map) { + size_t v = it.first; + size_t vRef = reference_map.find(v)->second; + size_t vMap = test_map.find(v)->second; + ASSERT_EQ(vMap, vRef); + } + } + } +} + +TEST(PhTreeFlatSparseMapTest, SmokeTestWithErase) { + const int max_size = 200; + + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, max_size - 1); + + for (int i = 0; i < 10; i++) { + b_plus_tree_map test_map{}; + std::unordered_map reference_map{}; + std::vector key_list{}; + for (int j = 0; j < 2 * max_size; j++) { + size_t val = cube_distribution(random_engine); + bool hasVal = test_map.find(val) != test_map.end(); + bool hasValRef = reference_map.find(val) != reference_map.end(); + ASSERT_EQ(hasVal, hasValRef); + if (!hasVal) { + reference_map.emplace(val, val); + test_map.try_emplace(val, val); + key_list.emplace_back(val); + } + } + + std::shuffle(key_list.begin(), key_list.end(), random_engine); + for (auto key : key_list) { + if (key % 2 == 0) { + test_map.erase(key); + } else { + auto it = test_map.find(key); + ASSERT_NE(it, test_map.end()); + ASSERT_EQ(it->second, key); + test_map.erase(it); + } + test_map._check(); + reference_map.erase(key); + for (auto it : reference_map) { + size_t vRef = it.first; + size_t vMap = test_map.find(vRef)->second; + ASSERT_EQ(vMap, vRef); + } + for (auto it : test_map) { + size_t v = it.first; + size_t vRef = reference_map.find(v)->second; + size_t vMap = test_map.find(v)->second; + ASSERT_EQ(vMap, vRef); + } + ASSERT_EQ(test_map.size(), reference_map.size()); + } + } +} + +TEST(PhTreeFlatSparseMapTest, SmokeTestLowerBound) { + const int max_size = 200; + + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, max_size - 1); + + for (int i = 0; i < 10; i++) { + b_plus_tree_map test_map; + std::map reference_map; + for (int j = 0; j < 2 * max_size; j++) { + size_t val = cube_distribution(random_engine); + bool hasVal = test_map.find(val) != test_map.end(); + bool hasValRef = reference_map.find(val) != reference_map.end(); + ASSERT_EQ(hasVal, hasValRef); + if (!hasVal) { + reference_map.emplace(val, val); + test_map.try_emplace(val, val); + } + ASSERT_EQ(test_map.size(), reference_map.size()); + for (auto it : reference_map) { + size_t vRef = it.first; + size_t vMap = test_map.lower_bound(vRef)->second; + ASSERT_EQ(vMap, vRef); + } + for (auto it : test_map) { + size_t v = it.first; + size_t vRef = reference_map.find(v)->second; + size_t vMap = test_map.lower_bound(v)->second; + ASSERT_EQ(vMap, vRef); + } + for (size_t v = 0; v < max_size + 5; ++v) { + auto itRef = reference_map.lower_bound(v); + auto itMap = test_map.lower_bound(v); + if (itRef == reference_map.end()) { + ASSERT_EQ(itMap, test_map.end()); + } else { + ASSERT_NE(itMap, test_map.end()); + // ASSERT_EQ(v, itRef->second); + ASSERT_EQ(itRef->second, itMap->second); + } + } + } + } +} diff --git a/phtree/common/common.h b/phtree/common/common.h index 2912c8ec..ce6fd286 100644 --- a/phtree/common/common.h +++ b/phtree/common/common.h @@ -23,6 +23,7 @@ #include "distance.h" #include "filter.h" #include "flat_array_map.h" +#include "b_plus_tree_map.h" #include "flat_sparse_map.h" #include "tree_stats.h" #include diff --git a/phtree/common/flat_sparse_map.h b/phtree/common/flat_sparse_map.h index 3c264223..6f588982 100644 --- a/phtree/common/flat_sparse_map.h +++ b/phtree/common/flat_sparse_map.h @@ -46,7 +46,9 @@ using index_t = std::int32_t; template class sparse_map { public: - explicit sparse_map() : data_{} {}; + explicit sparse_map() : data_{} { + data_.reserve(4); + } [[nodiscard]] auto find(size_t key) { auto it = lower_bound(key); diff --git a/phtree/phtree_test.cc b/phtree/phtree_test.cc index 42b4e78d..126943ba 100644 --- a/phtree/phtree_test.cc +++ b/phtree/phtree_test.cc @@ -57,6 +57,13 @@ static void reset_id_counters() { destruct_count_ = 0; } +static void print_id_counters() { + std::cout << "dc=" << default_construct_count_ << " c=" << construct_count_ + << " cc=" << copy_construct_count_ << " mc=" << move_construct_count_ + << " ca=" << copy_assign_count_ << " ma=" << move_assign_count_ + << " d=" << destruct_count_ << std::endl; +} + struct Id { Id() : _i{0} { ++default_construct_count_; @@ -64,7 +71,7 @@ struct Id { explicit Id(const size_t i) : _i{static_cast(i)} { ++construct_count_; - }; + } Id(const Id& other) { ++copy_construct_count_; @@ -76,15 +83,12 @@ struct Id { _i = other._i; } -// Id& operator=(const Id& other) = default; -// Id& operator=(Id&& other) = default; - Id& operator=(const Id& other) noexcept { ++copy_assign_count_; _i = other._i; return *this; } - Id& operator=(Id&& other) noexcept { + Id& operator=(Id&& other) noexcept { ++move_assign_count_; _i = other._i; return *this; @@ -188,7 +192,7 @@ void SmokeTestBasicOps(size_t N) { ASSERT_EQ(id._i, tree.find(p)->_i); ASSERT_EQ(i + 1, tree.size()); - // try add again + // try insert/emplace again ASSERT_FALSE(tree.insert(p, id).second); ASSERT_FALSE(tree.emplace(p, id).second); ASSERT_EQ(tree.count(p), 1); @@ -251,7 +255,10 @@ void SmokeTestBasicOps(size_t N) { // small node require a lot of copying/moving ASSERT_GE(construct_count_ * 3, move_construct_count_); } else { - ASSERT_GE(construct_count_ * 2, move_construct_count_); + if (construct_count_ * 15 < move_construct_count_) { + print_id_counters(); + } + ASSERT_GE(construct_count_ * 15, move_construct_count_); } } diff --git a/phtree/v16/node.h b/phtree/v16/node.h index 36c04f90..d96502f3 100644 --- a/phtree/v16/node.h +++ b/phtree/v16/node.h @@ -26,21 +26,26 @@ namespace improbable::phtree::v16 { /* - * We provide different implementations of the node's internal entry set: + * We provide different implementations of the node's internal entry set. + * All implementations are equivalent to "std::map" which can be used as + * a plugin example for verification. + * * - `array_map` is the fastest, but has O(2^DIM) space complexity. This can be very wasteful * because many nodes may have only 2 entries. * Also, iteration depends on some bit operations and is also O(DIM) per step if the CPU/compiler * does not support CTZ (count trailing bits). * - `sparse_map` is slower, but requires only O(n) memory (n = number of entries/children). * However, insertion/deletion is O(n), i.e. O(2^DIM) time complexity in the worst case. - * - 'std::map` is the least efficient for small node sizes but scales best with larger nodes and - * dimensionality. Remember that n_max = 2^DIM. + * - 'b_plus_tree_map` is the least efficient for small node sizes but scales best with larger + * nodes and dimensionality. Remember that n_max = 2^DIM. */ template using EntryMap = typename std::conditional< DIM <= 3, array_map, - typename std::conditional, std::map>::type>::type; + typename std:: + conditional, b_plus_tree_map>:: + type>::type; template using EntryIterator = decltype(EntryMap().begin()); From 039da736dc92e72440c62901a658c1e9bb280e81 Mon Sep 17 00:00:00 2001 From: Tilmann Date: Fri, 1 Apr 2022 14:17:56 +0200 Subject: [PATCH 11/16] Avoid std::uint16_t (#17) --- CHANGELOG.md | 5 +++++ phtree/common/base_types.h | 6 ++++-- phtree/common/filter.h | 4 ++-- phtree/v16/entry.h | 14 ++++++++++---- phtree/v16/iterator_knn_hs.h | 6 +++--- 5 files changed, 24 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2a4fb982..582bad93 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,11 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] ### Changed +- Potentially **BREAKING CHANGE** when using `IsNodeValid()` in provided filters: + Changed `bit_width_t` from `uin16_t` to `uint32_t`. This improves performance of 3D insert/emplace + on small datasets by up to 15%. To avoid warnings that meant that the API of `FilterAABB` and `FilterSphere` + had to be changed to accept `uint32_t` instead of `int`. This may break some implementations. + [#17](https://github.com/tzaeschke/phtree-cpp/pull/17) - DIM>8 now uses custom b_plus_tree_map instead of std::map. This improves performance for all operations, e.g. window queries on large datasets are up to 4x faster. Benchmarks results can be found in the issue. [#14](https://github.com/tzaeschke/phtree-cpp/issues/14) diff --git a/phtree/common/base_types.h b/phtree/common/base_types.h index 5ad77ea2..5f840f84 100644 --- a/phtree/common/base_types.h +++ b/phtree/common/base_types.h @@ -40,8 +40,10 @@ using scalar_64_t = int64_t; using scalar_32_t = int32_t; using scalar_16_t = int16_t; -// Bits in a coordinate (usually a double or long has 64 bits, so uint_8 suffices) -using bit_width_t = uint16_t; +// Bits in a coordinate (usually a double or long has 64 bits, so uint_8 suffices). +// However, uint32_t turned out to be faster, probably due to fewer cycles required for 32bit +// instructions (8bit/16bit tend to require more cycles, see CPU tables available on the web). +using bit_width_t = uint32_t; // Number of bit for 'scalar_64_t' or 'scalar_32_t'. Note that 'digits' does _not_ include sign bit, // so e.g. int64_t has 63 `digits`, however we need all bits, i.e. 64. template diff --git a/phtree/common/filter.h b/phtree/common/filter.h index 46eacee3..3a3e30f0 100644 --- a/phtree/common/filter.h +++ b/phtree/common/filter.h @@ -126,7 +126,7 @@ class FilterAABB { return true; } - [[nodiscard]] bool IsNodeValid(const KeyInternal& prefix, int bits_to_ignore) const { + [[nodiscard]] bool IsNodeValid(const KeyInternal& prefix, std::uint32_t bits_to_ignore) const { // Let's assume that we always want to traverse the root node (bits_to_ignore == 64) if (bits_to_ignore >= (MAX_BIT_WIDTH - 1)) { return true; @@ -187,7 +187,7 @@ class FilterSphere { * Calculate whether AABB encompassing all possible points in the node intersects with the * sphere. */ - [[nodiscard]] bool IsNodeValid(const KeyInternal& prefix, int bits_to_ignore) const { + [[nodiscard]] bool IsNodeValid(const KeyInternal& prefix, std::uint32_t bits_to_ignore) const { // we always want to traverse the root node (bits_to_ignore == 64) if (bits_to_ignore >= (MAX_BIT_WIDTH - 1)) { diff --git a/phtree/v16/entry.h b/phtree/v16/entry.h index 0e4b744e..ad6d3637 100644 --- a/phtree/v16/entry.h +++ b/phtree/v16/entry.h @@ -55,13 +55,19 @@ class Entry { * Construct entry with existing node. */ Entry(const KeyT& k, std::unique_ptr&& node_ptr, bit_width_t postfix_len) noexcept - : kd_key_{k}, node_{std::move(node_ptr)}, union_type_{NODE}, postfix_len_{postfix_len} {} + : kd_key_{k} + , node_{std::move(node_ptr)} + , union_type_{NODE} + , postfix_len_{static_cast(postfix_len)} {} /* * Construct entry with a new node. */ Entry(bit_width_t postfix_len) noexcept - : kd_key_(), node_{std::make_unique()}, union_type_{NODE}, postfix_len_{postfix_len} {} + : kd_key_() + , node_{std::make_unique()} + , union_type_{NODE} + , postfix_len_{static_cast(postfix_len)} {} /* * Construct entry with existing T. @@ -125,7 +131,7 @@ class Entry { } void SetNode(std::unique_ptr&& node, bit_width_t postfix_len) noexcept { - postfix_len_ = postfix_len; + postfix_len_ = static_cast(postfix_len); DestroyUnion(); union_type_ = NODE; new (&node_) std::unique_ptr{std::move(node)}; @@ -204,7 +210,7 @@ class Entry { // prefix_len + 1 + postfix_len = 64. // The '+1' accounts for the 1 bit that is represented by the local node's hypercube, // i.e. the same bit that is used to create the lookup keys in entries_. - alignas(2) bit_width_t postfix_len_; + alignas(2) std::uint16_t postfix_len_; }; } // namespace improbable::phtree::v16 diff --git a/phtree/v16/iterator_knn_hs.h b/phtree/v16/iterator_knn_hs.h index 2dc7aab0..ddf5bfc1 100644 --- a/phtree/v16/iterator_knn_hs.h +++ b/phtree/v16/iterator_knn_hs.h @@ -128,7 +128,7 @@ class IteratorKnnHS : public IteratorBase { current_distance_ = std::numeric_limits::max(); } - double DistanceToNode(const KeyInternal& prefix, int bits_to_ignore) { + double DistanceToNode(const KeyInternal& prefix, std::uint32_t bits_to_ignore) { assert(bits_to_ignore < MAX_BIT_WIDTH); SCALAR mask_min = MAX_MASK << bits_to_ignore; SCALAR mask_max = ~mask_min; @@ -153,8 +153,8 @@ class IteratorKnnHS : public IteratorBase { double current_distance_; std::priority_queue, CompareEntryDistByDistance> queue_; - int num_found_results_; - int num_requested_results_; + size_t num_found_results_; + size_t num_requested_results_; DISTANCE distance_; }; From 8732b56342225522852d26e4a7164f1eefb836ba Mon Sep 17 00:00:00 2001 From: Tilmann Date: Mon, 4 Apr 2022 15:46:28 +0200 Subject: [PATCH 12/16] moveable PhTree (#20) --- CHANGELOG.md | 1 + phtree/phtree.h | 11 ++- phtree/phtree_multimap.h | 16 ++++- phtree/phtree_multimap_d_test.cc | 70 +++++++++++++++++++ phtree/phtree_test.cc | 111 +++++++++++++++++++++++++++++++ phtree/v16/for_each.h | 6 +- phtree/v16/for_each_hc.h | 6 +- phtree/v16/iterator_base.h | 15 ++--- phtree/v16/iterator_full.h | 2 +- phtree/v16/iterator_hc.h | 2 +- phtree/v16/iterator_knn_hs.h | 4 +- phtree/v16/iterator_simple.h | 4 +- phtree/v16/phtree_v16.h | 14 ++-- 13 files changed, 235 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 582bad93..0245af7b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] ### Changed +- Make PhTree and PhTreeMultimap moveable (move-assign/copy). [#18](https://github.com/tzaeschke/phtree-cpp/issues/18) - Potentially **BREAKING CHANGE** when using `IsNodeValid()` in provided filters: Changed `bit_width_t` from `uin16_t` to `uint32_t`. This improves performance of 3D insert/emplace on small datasets by up to 15%. To avoid warnings that meant that the API of `FilterAABB` and `FilterSphere` diff --git a/phtree/phtree.h b/phtree/phtree.h index 54dfd2dd..8250e96a 100644 --- a/phtree/phtree.h +++ b/phtree/phtree.h @@ -32,7 +32,6 @@ namespace improbable::phtree { template > class PhTree { friend PhTreeDebugHelper; - using KeyInternal = typename CONVERTER::KeyInternal; using QueryBox = typename CONVERTER::QueryBoxExternal; using Key = typename CONVERTER::KeyExternal; static constexpr dimension_t DimInternal = CONVERTER::DimInternal; @@ -42,7 +41,15 @@ class PhTree { typename std::conditional<(DIM == DimInternal), QueryPoint, QueryIntersect>::type; public: - explicit PhTree(CONVERTER converter = CONVERTER()) : tree_{converter}, converter_{converter} {} + template + explicit PhTree(CONVERTER2&& converter = CONVERTER()) + : tree_{&converter_}, converter_{converter} {} + + PhTree(const PhTree& other) = delete; + PhTree& operator=(const PhTree& other) = delete; + PhTree(PhTree&& other) noexcept = default; + PhTree& operator=(PhTree&& other) noexcept = default; + ~PhTree() noexcept = default; /* * Attempts to build and insert a key and a value into the tree. diff --git a/phtree/phtree_multimap.h b/phtree/phtree_multimap.h index 75540f9f..af45bb8b 100644 --- a/phtree/phtree_multimap.h +++ b/phtree/phtree_multimap.h @@ -236,7 +236,21 @@ class PhTreeMultiMap { using EndType = decltype(std::declval>().end()); explicit PhTreeMultiMap(CONVERTER converter = CONVERTER()) - : tree_{converter}, converter_{converter}, size_{0} {} + : tree_{&converter_}, converter_{converter}, size_{0} {} + + PhTreeMultiMap(const PhTreeMultiMap& other) = delete; + PhTreeMultiMap& operator=(const PhTreeMultiMap& other) = delete; + PhTreeMultiMap(PhTreeMultiMap&& other) noexcept = default; + // PhTreeMultiMap& operator=(PhTreeMultiMap&& other) noexcept = default; + PhTreeMultiMap& operator=(PhTreeMultiMap&& other) noexcept { + tree_ = std::move(other.tree_); + converter_ = std::move(other.converter_); + //the_end_ = std::move(other.the_end_); // TODO THis works, but it is pretty dirty! + bucket_dummy_end_ = std::move(other.bucket_dummy_end_); + size_ = std::move(other.size_); + return *this; + } + ~PhTreeMultiMap() noexcept = default; /* * Attempts to build and insert a key and a value into the tree. diff --git a/phtree/phtree_multimap_d_test.cc b/phtree/phtree_multimap_d_test.cc index d695ec91..c39758fc 100644 --- a/phtree/phtree_multimap_d_test.cc +++ b/phtree/phtree_multimap_d_test.cc @@ -1102,3 +1102,73 @@ TEST(PhTreeMMDTest, SmokeTestTreeAPI) { treePtr.clear(); delete idPtr; } + +template +void test_tree(TREE& tree) { + PhPointD<3> p{1, 2, 3}; + + // test various operations + tree.emplace(p, Id{2}); + Id id3{3}; + tree.insert(p, id3); + ASSERT_EQ(tree.size(), 3); + ASSERT_EQ(tree.find(p)->_i, 3); + + auto q_window = tree.begin_query({p, p}); + ASSERT_EQ(3, q_window->_i); + ++q_window; + ASSERT_EQ(2, q_window->_i); + ++q_window; + ASSERT_EQ(1, q_window->_i); + ++q_window; + ASSERT_EQ(q_window, tree.end()); + + auto q_extent = tree.begin(); + ASSERT_EQ(3, q_extent->_i); + ++q_extent; + ASSERT_EQ(2, q_extent->_i); + ++q_extent; + ASSERT_EQ(1, q_extent->_i); + ++q_extent; + ASSERT_EQ(q_extent, tree.end()); + + auto q_knn = tree.begin_knn_query(10, p, DistanceEuclidean<3>()); + ASSERT_EQ(3, q_knn->_i); + ++q_knn; + ASSERT_EQ(2, q_knn->_i); + ++q_knn; + ASSERT_EQ(1, q_knn->_i); + ++q_knn; + ASSERT_EQ(q_knn, tree.end()); + + ASSERT_EQ(1, tree.erase(p, Id{1})); + ASSERT_EQ(2, tree.size()); + ASSERT_EQ(0, tree.erase(p, Id{1})); + ASSERT_EQ(2, tree.size()); + ASSERT_EQ(1, tree.erase(p, Id{2})); + ASSERT_EQ(1, tree.erase(p, Id{3})); + ASSERT_TRUE(tree.empty()); +} + +TEST(PhTreeTest, TestMoveConstruct) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + PhTreeMultiMapD<3, Id> tree1; + tree1.emplace(p, Id{1}); + + TestTree<3, Id> tree{std::move(tree1)}; + test_tree(tree); + tree.~PhTreeMultiMap(); +} + +TEST(PhTreeTest, TestMoveAssign) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + PhTreeMultiMapD<3, Id> tree1; + tree1.emplace(p, Id{1}); + + TestTree<3, Id> tree{}; + tree = std::move(tree1); + test_tree(tree); + tree.~PhTreeMultiMap(); +} \ No newline at end of file diff --git a/phtree/phtree_test.cc b/phtree/phtree_test.cc index 126943ba..34fae600 100644 --- a/phtree/phtree_test.cc +++ b/phtree/phtree_test.cc @@ -1077,3 +1077,114 @@ TEST(PhTreeTest, SmokeTestPoint1) { ASSERT_EQ(0, tree.size()); ASSERT_TRUE(tree.empty()); } + +template +void test_tree(TREE& tree) { + PhPoint<3> p{1, 2, 3}; + + // test various operations + tree.emplace(p, Id{2}); // already exists + Id id3{3}; + tree.insert(p, id3); // already exists + ASSERT_EQ(tree.size(), 1); + ASSERT_EQ(tree.find(p).second()._i, 1); + ASSERT_EQ(tree[p]._i, 1); + + auto q_window = tree.begin_query({p, p}); + ASSERT_EQ(1, q_window->_i); + ++q_window; + ASSERT_EQ(q_window, tree.end()); + + auto q_extent = tree.begin(); + ASSERT_EQ(1, q_extent->_i); + ++q_extent; + ASSERT_EQ(q_extent, tree.end()); + + auto q_knn = tree.begin_knn_query(10, p, DistanceEuclidean<3>()); + ASSERT_EQ(1, q_knn->_i); + ++q_knn; + ASSERT_EQ(q_knn, tree.end()); + + ASSERT_EQ(1, tree.erase(p)); + ASSERT_EQ(0, tree.size()); + ASSERT_EQ(0, tree.erase(p)); + ASSERT_EQ(0, tree.size()); + ASSERT_TRUE(tree.empty()); +} + +TEST(PhTreeTest, TestMoveConstruct) { + // Test edge case: only one entry in tree + PhPoint<3> p{1, 2, 3}; + TestTree<3, Id> tree1; + tree1.emplace(p, Id{1}); + + TestTree<3, Id> tree{std::move(tree1)}; + test_tree(tree); + tree.~PhTree(); +} + +TEST(PhTreeTest, TestMoveAssign) { + // Test edge case: only one entry in tree + PhPoint<3> p{1, 2, 3}; + TestTree<3, Id> tree1; + tree1.emplace(p, Id{1}); + + TestTree<3, Id> tree{}; + tree = std::move(tree1); + test_tree(tree); + tree.~PhTree(); +} + +size_t count_pre{0}; +size_t count_post{0}; +size_t count_query{0}; + +template +struct DebugConverterNoOp : public ConverterPointBase { + using BASE = ConverterPointBase; + using Point = typename BASE::KeyExternal; + using PointInternal = typename BASE::KeyInternal; + + constexpr const PointInternal& pre(const Point& point) const { + ++count_pre; + ++const_cast(count_pre_local); + return point; + } + + constexpr const Point& post(const PointInternal& point) const { + ++count_post; + ++const_cast(count_post_local); + return point; + } + + constexpr const PhBox& pre_query(const PhBox& box) const { + ++count_query; + ++const_cast(count_query_local); + return box; + } + + size_t count_pre_local{0}; + size_t count_post_local{0}; + size_t count_query_local{0}; +}; + +TEST(PhTreeTest, TestMoveAssignCustomConverter) { + // Test edge case: only one entry in tree + PhPoint<3> p{1, 2, 3}; + auto converter = DebugConverterNoOp<3>(); + auto tree1 = PhTree<3, Id, DebugConverterNoOp<3>>(converter); + tree1.emplace(p, Id{1}); + ASSERT_GE(tree1.converter().count_pre_local, 1); + ASSERT_EQ(tree1.converter().count_pre_local, count_pre); + + PhTree<3, Id, DebugConverterNoOp<3>> tree{}; + tree = std::move(tree1); + // Assert that converter got moved (or copied?): + ASSERT_GE(tree.converter().count_pre_local, 1); + ASSERT_EQ(tree.converter().count_pre_local, count_pre); + + test_tree(tree); + ASSERT_GE(tree.converter().count_pre_local, 2); + ASSERT_EQ(tree.converter().count_pre_local, count_pre); + tree.~PhTree(); +} \ No newline at end of file diff --git a/phtree/v16/for_each.h b/phtree/v16/for_each.h index 2531f70e..29706187 100644 --- a/phtree/v16/for_each.h +++ b/phtree/v16/for_each.h @@ -34,7 +34,7 @@ class ForEach { using EntryT = Entry; public: - ForEach(const CONVERT& converter, CALLBACK_FN& callback, FILTER filter) + ForEach(const CONVERT* converter, CALLBACK_FN& callback, FILTER filter) : converter_{converter}, callback_{callback}, filter_(std::move(filter)) {} void Traverse(const EntryT& entry) { @@ -52,13 +52,13 @@ class ForEach { } else { T& value = child.GetValue(); if (filter_.IsEntryValid(child_key, value)) { - callback_(converter_.post(child_key), value); + callback_(converter_->post(child_key), value); } } } } - CONVERT converter_; + const CONVERT* converter_; CALLBACK_FN& callback_; FILTER filter_; }; diff --git a/phtree/v16/for_each_hc.h b/phtree/v16/for_each_hc.h index 46556f1e..01d6e89b 100644 --- a/phtree/v16/for_each_hc.h +++ b/phtree/v16/for_each_hc.h @@ -44,7 +44,7 @@ class ForEachHC { ForEachHC( const KeyInternal& range_min, const KeyInternal& range_max, - const CONVERT& converter, + const CONVERT* converter, CALLBACK_FN& callback, FILTER filter) : range_min_{range_min} @@ -77,7 +77,7 @@ class ForEachHC { T& value = child.GetValue(); if (IsInRange(child_key, range_min_, range_max_) && filter_.IsEntryValid(child_key, value)) { - callback_(converter_.post(child_key), value); + callback_(converter_->post(child_key), value); } } } @@ -168,7 +168,7 @@ class ForEachHC { const KeyInternal range_min_; const KeyInternal range_max_; - CONVERT converter_; + const CONVERT* converter_; CALLBACK_FN& callback_; FILTER filter_; }; diff --git a/phtree/v16/iterator_base.h b/phtree/v16/iterator_base.h index 8fcd6eea..aaeb9101 100644 --- a/phtree/v16/iterator_base.h +++ b/phtree/v16/iterator_base.h @@ -38,7 +38,7 @@ class IteratorBase { friend PhTreeV16; public: - explicit IteratorBase(const CONVERT& converter) + explicit IteratorBase(const CONVERT* converter) : current_result_{nullptr} , current_node_{} , parent_node_{} @@ -46,7 +46,7 @@ class IteratorBase { , converter_{converter} , filter_{FILTER()} {} - explicit IteratorBase(const CONVERT& converter, FILTER filter) + explicit IteratorBase(const CONVERT* converter, FILTER filter) : current_result_{nullptr} , current_node_{} , parent_node_{} @@ -85,7 +85,7 @@ class IteratorBase { } auto first() const { - return converter_.post(current_result_->GetKey()); + return converter_->post(current_result_->GetKey()); } T& second() const { @@ -107,9 +107,8 @@ class IteratorBase { } [[nodiscard]] bool ApplyFilter(const EntryT& entry) const { - return entry.IsNode() - ? filter_.IsNodeValid(entry.GetKey(), entry.GetNodePostfixLen() + 1) - : filter_.IsEntryValid(entry.GetKey(), entry.GetValue()); + return entry.IsNode() ? filter_.IsNodeValid(entry.GetKey(), entry.GetNodePostfixLen() + 1) + : filter_.IsEntryValid(entry.GetKey(), entry.GetValue()); } void SetCurrentResult(const EntryT* current_result) { @@ -127,7 +126,7 @@ class IteratorBase { } auto post(const KeyInternal& point) { - return converter_.post(point); + return converter_->post(point); } private: @@ -147,7 +146,7 @@ class IteratorBase { const EntryT* current_node_; const EntryT* parent_node_; bool is_finished_; - const CONVERT& converter_; + const CONVERT* converter_; FILTER filter_; }; diff --git a/phtree/v16/iterator_full.h b/phtree/v16/iterator_full.h index b60be035..6be55af9 100644 --- a/phtree/v16/iterator_full.h +++ b/phtree/v16/iterator_full.h @@ -33,7 +33,7 @@ class IteratorFull : public IteratorBase { using EntryT = typename IteratorBase::EntryT; public: - IteratorFull(const EntryT& root, const CONVERT& converter, FILTER filter) + IteratorFull(const EntryT& root, const CONVERT* converter, FILTER filter) : IteratorBase(converter, filter), stack_{}, stack_size_{0} { PrepareAndPush(root.GetNode()); FindNextElement(); diff --git a/phtree/v16/iterator_hc.h b/phtree/v16/iterator_hc.h index bf6cccda..441b9b71 100644 --- a/phtree/v16/iterator_hc.h +++ b/phtree/v16/iterator_hc.h @@ -53,7 +53,7 @@ class IteratorHC : public IteratorBase { const EntryT& root, const KeyInternal& range_min, const KeyInternal& range_max, - const CONVERT& converter, + const CONVERT* converter, FILTER filter) : IteratorBase(converter, filter) , stack_size_{0} diff --git a/phtree/v16/iterator_knn_hs.h b/phtree/v16/iterator_knn_hs.h index ddf5bfc1..2ed8c844 100644 --- a/phtree/v16/iterator_knn_hs.h +++ b/phtree/v16/iterator_knn_hs.h @@ -57,12 +57,12 @@ class IteratorKnnHS : public IteratorBase { const EntryT& root, size_t min_results, const KeyInternal& center, - const CONVERT& converter, + const CONVERT* converter, DISTANCE dist, FILTER filter) : IteratorBase(converter, filter) , center_{center} - , center_post_{converter.post(center)} + , center_post_{converter->post(center)} , current_distance_{std::numeric_limits::max()} , num_found_results_(0) , num_requested_results_(min_results) diff --git a/phtree/v16/iterator_simple.h b/phtree/v16/iterator_simple.h index 815979a7..703d56d6 100644 --- a/phtree/v16/iterator_simple.h +++ b/phtree/v16/iterator_simple.h @@ -29,7 +29,7 @@ class IteratorSimple : public IteratorBase { using EntryT = typename IteratorBase::EntryT; public: - explicit IteratorSimple(const CONVERT& converter) : IteratorBase(converter) { + explicit IteratorSimple(const CONVERT* converter) : IteratorBase(converter) { this->SetFinished(); } @@ -37,7 +37,7 @@ class IteratorSimple : public IteratorBase { const EntryT* current_result, const EntryT* current_node, const EntryT* parent_node, - CONVERT converter) + const CONVERT* converter) : IteratorBase(converter) { if (current_result) { this->SetCurrentResult(current_result); diff --git a/phtree/v16/phtree_v16.h b/phtree/v16/phtree_v16.h index fb666370..79c32749 100644 --- a/phtree/v16/phtree_v16.h +++ b/phtree/v16/phtree_v16.h @@ -68,11 +68,17 @@ class PhTreeV16 { std::is_arithmetic::value, "ScalarExternal must be an arithmetic type"); static_assert(DIM >= 1 && DIM <= 63, "This PH-Tree supports between 1 and 63 dimensions"); - PhTreeV16(CONVERT& converter = ConverterNoOp()) + PhTreeV16(CONVERT* converter) : num_entries_{0} , root_{MAX_BIT_WIDTH - 1} - , the_end_{converter} - , converter_{converter} {} + , converter_{converter} + , the_end_{converter} {} + + PhTreeV16(const PhTreeV16& other) = delete; + PhTreeV16& operator=(const PhTreeV16& other) = delete; + PhTreeV16(PhTreeV16&& other) noexcept = default; + PhTreeV16& operator=(PhTreeV16&& other) noexcept = default; + ~PhTreeV16() noexcept = default; /* * Attempts to build and insert a key and a value into the tree. @@ -395,8 +401,8 @@ class PhTreeV16 { // Contract: root_ contains a Node with 0 or more entries (the root node is the only Node // that is allowed to have less than two entries. EntryT root_; + CONVERT* converter_; IteratorEnd the_end_; - CONVERT converter_; }; } // namespace improbable::phtree::v16 From 1078e35a467c095650f8fc4880c903a37e17ff7f Mon Sep 17 00:00:00 2001 From: Tilmann Date: Thu, 7 Apr 2022 11:33:55 +0200 Subject: [PATCH 13/16] Fix/19 clean up iterators (#23) --- CHANGELOG.md | 3 +- phtree/phtree.h | 5 +- phtree/phtree_multimap.h | 127 +++++++----------- phtree/phtree_multimap_d_test.cc | 34 +++++ phtree/phtree_test.cc | 34 +++++ phtree/v16/BUILD | 2 +- phtree/v16/CMakeLists.txt | 2 +- phtree/v16/entry.h | 1 + phtree/v16/for_each.h | 2 +- phtree/v16/for_each_hc.h | 2 +- phtree/v16/iterator_base.h | 125 +++++++---------- phtree/v16/iterator_full.h | 20 +-- phtree/v16/iterator_hc.h | 27 ++-- phtree/v16/iterator_knn_hs.h | 12 +- ...erator_simple.h => iterator_with_parent.h} | 50 +++---- phtree/v16/phtree_v16.h | 115 ++++++++-------- 16 files changed, 286 insertions(+), 275 deletions(-) rename phtree/v16/{iterator_simple.h => iterator_with_parent.h} (58%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0245af7b..fe2fbdde 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] ### Changed -- Make PhTree and PhTreeMultimap moveable (move-assign/copy). [#18](https://github.com/tzaeschke/phtree-cpp/issues/18) +- Clean up iterator implementations. [#19](https://github.com/tzaeschke/phtree-cpp/issues/19) +- Make PhTree and PhTreeMultimap movable (move-assign/copy). [#18](https://github.com/tzaeschke/phtree-cpp/issues/18) - Potentially **BREAKING CHANGE** when using `IsNodeValid()` in provided filters: Changed `bit_width_t` from `uin16_t` to `uint32_t`. This improves performance of 3D insert/emplace on small datasets by up to 15%. To avoid warnings that meant that the API of `FilterAABB` and `FilterSphere` diff --git a/phtree/phtree.h b/phtree/phtree.h index 8250e96a..e54d174d 100644 --- a/phtree/phtree.h +++ b/phtree/phtree.h @@ -32,7 +32,6 @@ namespace improbable::phtree { template > class PhTree { friend PhTreeDebugHelper; - using QueryBox = typename CONVERTER::QueryBoxExternal; using Key = typename CONVERTER::KeyExternal; static constexpr dimension_t DimInternal = CONVERTER::DimInternal; @@ -41,6 +40,8 @@ class PhTree { typename std::conditional<(DIM == DimInternal), QueryPoint, QueryIntersect>::type; public: + using QueryBox = typename CONVERTER::QueryBoxExternal; + template explicit PhTree(CONVERTER2&& converter = CONVERTER()) : tree_{&converter_}, converter_{converter} {} @@ -256,7 +257,7 @@ class PhTree { /* * @return An iterator representing the tree's 'end'. */ - const auto& end() const { + auto end() const { return tree_.end(); } diff --git a/phtree/phtree_multimap.h b/phtree/phtree_multimap.h index af45bb8b..41efb771 100644 --- a/phtree/phtree_multimap.h +++ b/phtree/phtree_multimap.h @@ -56,8 +56,11 @@ class IteratorBase { friend PHTREE; using T = typename PHTREE::ValueType; + protected: + using BucketIterType = typename PHTREE::BucketIterType; + public: - explicit IteratorBase() noexcept : current_value_ptr_{nullptr}, is_finished_{false} {} + explicit IteratorBase() noexcept : current_value_ptr_{nullptr} {} T& operator*() const noexcept { assert(current_value_ptr_); @@ -71,26 +74,16 @@ class IteratorBase { friend bool operator==( const IteratorBase& left, const IteratorBase& right) noexcept { - // Note: The following compares pointers to Entry objects (actually: their values T) - // so it should be _fast_ and return 'true' only for identical entries. - static_assert(std::is_pointer_v); - return (left.is_finished_ && right.Finished()) || - (!left.is_finished_ && !right.Finished() && - left.current_value_ptr_ == right.current_value_ptr_); + return left.current_value_ptr_ == right.current_value_ptr_; } friend bool operator!=( const IteratorBase& left, const IteratorBase& right) noexcept { - return !(left == right); + return left.current_value_ptr_ != right.current_value_ptr_; } protected: - [[nodiscard]] bool Finished() const noexcept { - return is_finished_; - } - void SetFinished() noexcept { - is_finished_ = true; current_value_ptr_ = nullptr; } @@ -100,41 +93,23 @@ class IteratorBase { private: const T* current_value_ptr_; - bool is_finished_; }; -template +template class IteratorNormal : public IteratorBase { friend PHTREE; - using BucketIterType = typename PHTREE::BucketIterType; - using PhTreeIterEndType = typename PHTREE::EndType; + using BucketIterType = typename IteratorBase::BucketIterType; public: - explicit IteratorNormal(const PhTreeIterEndType& iter_ph_end) noexcept - : IteratorBase() - , iter_ph_end_{iter_ph_end} - , iter_ph_{iter_ph_end} - , iter_bucket_{} - , filter_{} { - this->SetFinished(); - } + explicit IteratorNormal() noexcept + : IteratorBase(), iter_ph_{}, iter_bucket_{}, filter_{} {} - // Why are we passing two iterators by reference + std::move? - // See: https://abseil.io/tips/117 - IteratorNormal( - const PhTreeIterEndType& iter_ph_end, - ITERATOR_PH iter_ph, - BucketIterType iter_bucket, - const FILTER filter = FILTER()) noexcept + template + IteratorNormal(ITER_PH&& iter_ph, BucketIterType&& iter_bucket, FILTER2&& filter) noexcept : IteratorBase() - , iter_ph_end_{iter_ph_end} - , iter_ph_{std::move(iter_ph)} - , iter_bucket_{std::move(iter_bucket)} - , filter_{filter} { - if (iter_ph == iter_ph_end) { - this->SetFinished(); - return; - } + , iter_ph_{std::forward(iter_ph)} + , iter_bucket_{std::forward(iter_bucket)} + , filter_{std::forward(filter)} { FindNextElement(); } @@ -168,7 +143,7 @@ class IteratorNormal : public IteratorBase { private: void FindNextElement() { - while (iter_ph_ != iter_ph_end_) { + while (!iter_ph_.IsEnd()) { while (iter_bucket_ != iter_ph_->end()) { // We filter only entries here, nodes are filtered elsewhere if (filter_.IsEntryValid(iter_ph_.GetCurrentResult()->GetKey(), *iter_bucket_)) { @@ -178,7 +153,7 @@ class IteratorNormal : public IteratorBase { ++iter_bucket_; } ++iter_ph_; - if (iter_ph_ != iter_ph_end_) { + if (!iter_ph_.IsEnd()) { iter_bucket_ = iter_ph_->begin(); } } @@ -186,7 +161,6 @@ class IteratorNormal : public IteratorBase { this->SetFinished(); } - PhTreeIterEndType& iter_ph_end_; ITERATOR_PH iter_ph_; BucketIterType iter_bucket_; FILTER filter_; @@ -194,16 +168,11 @@ class IteratorNormal : public IteratorBase { template class IteratorKnn : public IteratorNormal { - using BucketIterType = typename PHTREE::BucketIterType; - using PhTreeIterEndType = typename PHTREE::EndType; - public: - IteratorKnn( - const PhTreeIterEndType& iter_ph_end, - const ITERATOR_PH iter_ph, - BucketIterType iter_bucket, - const FILTER filter) noexcept - : IteratorNormal(iter_ph_end, iter_ph, iter_bucket, filter) {} + template + IteratorKnn(ITERATOR_PH iter_ph, BucketIterType&& iter_bucket, const FILTER filter) noexcept + : IteratorNormal( + std::forward(iter_ph), std::forward(iter_bucket), filter) {} [[nodiscard]] double distance() const noexcept { return this->GetIteratorOfPhTree().distance(); @@ -223,17 +192,19 @@ template < bool POINT_KEYS = true, typename DEFAULT_QUERY_TYPE = QueryPoint> class PhTreeMultiMap { - friend PhTreeDebugHelper; using KeyInternal = typename CONVERTER::KeyInternal; - using QueryBox = typename CONVERTER::QueryBoxExternal; using Key = typename CONVERTER::KeyExternal; static constexpr dimension_t DimInternal = CONVERTER::DimInternal; using PHTREE = PhTreeMultiMap; - - public: using ValueType = T; using BucketIterType = decltype(std::declval().begin()); - using EndType = decltype(std::declval>().end()); + using EndType = decltype(std::declval>().end()); + + friend PhTreeDebugHelper; + friend IteratorBase; + + public: + using QueryBox = typename CONVERTER::QueryBoxExternal; explicit PhTreeMultiMap(CONVERTER converter = CONVERTER()) : tree_{&converter_}, converter_{converter}, size_{0} {} @@ -241,15 +212,7 @@ class PhTreeMultiMap { PhTreeMultiMap(const PhTreeMultiMap& other) = delete; PhTreeMultiMap& operator=(const PhTreeMultiMap& other) = delete; PhTreeMultiMap(PhTreeMultiMap&& other) noexcept = default; - // PhTreeMultiMap& operator=(PhTreeMultiMap&& other) noexcept = default; - PhTreeMultiMap& operator=(PhTreeMultiMap&& other) noexcept { - tree_ = std::move(other.tree_); - converter_ = std::move(other.converter_); - //the_end_ = std::move(other.the_end_); // TODO THis works, but it is pretty dirty! - bucket_dummy_end_ = std::move(other.bucket_dummy_end_); - size_ = std::move(other.size_); - return *this; - } + PhTreeMultiMap& operator=(PhTreeMultiMap&& other) noexcept = default; ~PhTreeMultiMap() noexcept = default; /* @@ -357,7 +320,7 @@ class PhTreeMultiMap { auto find(const Key& key) const { auto outer_iter = tree_.find(converter_.pre(key)); if (outer_iter == tree_.end()) { - return CreateIterator(tree_.end(), bucket_dummy_end_); + return CreateIterator(outer_iter, BucketIterType{}); } auto bucket_iter = outer_iter.second().begin(); return CreateIterator(outer_iter, bucket_iter); @@ -374,7 +337,7 @@ class PhTreeMultiMap { auto find(const Key& key, const T& value) const { auto outer_iter = tree_.find(converter_.pre(key)); if (outer_iter == tree_.end()) { - return CreateIterator(tree_.end(), bucket_dummy_end_); + return CreateIterator(outer_iter, BucketIterType{}); } auto bucket_iter = outer_iter.second().find(value); return CreateIterator(outer_iter, bucket_iter); @@ -543,7 +506,7 @@ class PhTreeMultiMap { auto begin(FILTER filter = FILTER()) const { auto outer_iter = tree_.begin(WrapFilter(filter)); if (outer_iter == tree_.end()) { - return CreateIterator(outer_iter, bucket_dummy_end_, filter); + return CreateIterator(outer_iter, BucketIterType{}, filter); } auto bucket_iter = outer_iter.second().begin(); assert(bucket_iter != outer_iter.second().end()); @@ -568,7 +531,7 @@ class PhTreeMultiMap { auto outer_iter = tree_.begin_query(query_type(converter_.pre_query(query_box)), WrapFilter(filter)); if (outer_iter == tree_.end()) { - return CreateIterator(outer_iter, bucket_dummy_end_, filter); + return CreateIterator(outer_iter, BucketIterType{}, filter); } auto bucket_iter = outer_iter.second().begin(); assert(bucket_iter != outer_iter.second().end()); @@ -604,7 +567,7 @@ class PhTreeMultiMap { auto outer_iter = tree_.begin_knn_query( min_results, converter_.pre(center), distance_function, WrapFilter(filter)); if (outer_iter == tree_.end()) { - return CreateIteratorKnn(outer_iter, bucket_dummy_end_, filter); + return CreateIteratorKnn(outer_iter, BucketIterType{}, filter); } auto bucket_iter = outer_iter.second().begin(); assert(bucket_iter != outer_iter.second().end()); @@ -614,8 +577,8 @@ class PhTreeMultiMap { /* * @return An iterator representing the tree's 'end'. */ - const auto& end() const { - return the_end_; + auto end() const { + return IteratorNormal{}; } /* @@ -653,18 +616,22 @@ class PhTreeMultiMap { return tree_; } - template + template auto CreateIterator( - OUTER_ITER outer_iter, BucketIterType bucket_iter, FILTER filter = FILTER()) const { + OUTER_ITER outer_iter, INNER_ITER&& bucket_iter, FILTER&& filter = FILTER()) const { return IteratorNormal( - tree_.end(), std::move(outer_iter), std::move(bucket_iter), filter); + std::forward(outer_iter), + std::forward(bucket_iter), + std::forward(filter)); } - template + template auto CreateIteratorKnn( - OUTER_ITER outer_iter, BucketIterType bucket_iter, FILTER filter = FILTER()) const { + OUTER_ITER outer_iter, INNER_ITER&& bucket_iter, FILTER&& filter = FILTER()) const { return IteratorKnn( - tree_.end(), std::move(outer_iter), std::move(bucket_iter), filter); + std::forward(outer_iter), + std::forward(bucket_iter), + std::forward(filter)); } template @@ -709,8 +676,6 @@ class PhTreeMultiMap { v16::PhTreeV16 tree_; CONVERTER converter_; - IteratorNormal the_end_{tree_.end()}; - BucketIterType bucket_dummy_end_; size_t size_; }; diff --git a/phtree/phtree_multimap_d_test.cc b/phtree/phtree_multimap_d_test.cc index c39758fc..1d493446 100644 --- a/phtree/phtree_multimap_d_test.cc +++ b/phtree/phtree_multimap_d_test.cc @@ -490,6 +490,10 @@ TEST(PhTreeMMDTest, TestUpdateWithEmplaceHint) { ASSERT_EQ(N, tree.size()); tree.clear(); + + tree.emplace_hint(tree.end(), {11, 21, 31}, 421); + tree.emplace_hint(tree.begin(), {1, 2, 3}, 42); + ASSERT_EQ(2, tree.size()); } TEST(PhTreeMMDTest, TestUpdateWithRelocate) { @@ -1171,4 +1175,34 @@ TEST(PhTreeTest, TestMoveAssign) { tree = std::move(tree1); test_tree(tree); tree.~PhTreeMultiMap(); +} + +TEST(PhTreeTest, TestMovableIterators) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + ASSERT_TRUE(std::is_move_constructible_v); + // ASSERT_TRUE(std::is_move_assignable_v); + ASSERT_NE(tree.begin(), tree.end()); + + ASSERT_TRUE(std::is_move_constructible_v); + ASSERT_TRUE(std::is_move_assignable_v); + + ASSERT_TRUE(std::is_move_constructible_v); + ASSERT_TRUE(std::is_move_assignable_v); + ASSERT_NE(tree.find(p), tree.end()); + + TestTree<3, Id>::QueryBox qb{{1, 2, 3}, {4, 5, 6}}; + FilterAABB filter(p, p, tree.converter()); + ASSERT_TRUE(std::is_move_constructible_v); + // Not movable due to constant fields + // ASSERT_TRUE(std::is_move_assignable_v); + + ASSERT_TRUE(std::is_move_constructible_v()))>); + // Not movable due to constant fields + // ASSERT_TRUE(std::is_move_assignable_v()))>); } \ No newline at end of file diff --git a/phtree/phtree_test.cc b/phtree/phtree_test.cc index 34fae600..7a7abfe7 100644 --- a/phtree/phtree_test.cc +++ b/phtree/phtree_test.cc @@ -559,6 +559,10 @@ TEST(PhTreeTest, TestUpdateWithEmplaceHint) { ASSERT_EQ(N, tree.size()); tree.clear(); + + tree.emplace_hint(tree.end(), {11, 21, 31}, 421); + tree.emplace_hint(tree.begin(), {1, 2, 3}, 42); + ASSERT_EQ(2, tree.size()); } TEST(PhTreeTest, TestEraseByIterator) { @@ -1187,4 +1191,34 @@ TEST(PhTreeTest, TestMoveAssignCustomConverter) { ASSERT_GE(tree.converter().count_pre_local, 2); ASSERT_EQ(tree.converter().count_pre_local, count_pre); tree.~PhTree(); +} + +TEST(PhTreeTest, TestMovableIterators) { + // Test edge case: only one entry in tree + PhPoint<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + ASSERT_TRUE(std::is_move_constructible_v); + ASSERT_TRUE(std::is_move_assignable_v); + ASSERT_NE(tree.begin(), tree.end()); + + ASSERT_TRUE(std::is_move_constructible_v); + ASSERT_TRUE(std::is_move_assignable_v); + + ASSERT_TRUE(std::is_move_constructible_v); + ASSERT_TRUE(std::is_move_assignable_v); + ASSERT_NE(tree.find(p), tree.end()); + + TestTree<3, Id>::QueryBox qb{{1, 2, 3}, {4, 5, 6}}; + FilterEvenId<3, Id> filter{}; + ASSERT_TRUE(std::is_move_constructible_v); + // Not movable due to constant fields + // ASSERT_TRUE(std::is_move_assignable_v); + + ASSERT_TRUE(std::is_move_constructible_v()))>); + // Not movable due to constant fields + // ASSERT_TRUE(std::is_move_assignable_v()))>); } \ No newline at end of file diff --git a/phtree/v16/BUILD b/phtree/v16/BUILD index b44b14a1..caf9f902 100644 --- a/phtree/v16/BUILD +++ b/phtree/v16/BUILD @@ -13,7 +13,7 @@ cc_library( "iterator_full.h", "iterator_hc.h", "iterator_knn_hs.h", - "iterator_simple.h", + "iterator_with_parent.h", "node.h", "phtree_v16.h", ], diff --git a/phtree/v16/CMakeLists.txt b/phtree/v16/CMakeLists.txt index 1aa65630..871de932 100644 --- a/phtree/v16/CMakeLists.txt +++ b/phtree/v16/CMakeLists.txt @@ -9,6 +9,6 @@ target_sources(phtree iterator_full.h iterator_hc.h iterator_knn_hs.h - iterator_simple.h + iterator_with_parent.h phtree_v16.h ) diff --git a/phtree/v16/entry.h b/phtree/v16/entry.h index ad6d3637..c9964f9e 100644 --- a/phtree/v16/entry.h +++ b/phtree/v16/entry.h @@ -51,6 +51,7 @@ class Entry { }; public: + using OrigValueT = T; /* * Construct entry with existing node. */ diff --git a/phtree/v16/for_each.h b/phtree/v16/for_each.h index 29706187..807c63ac 100644 --- a/phtree/v16/for_each.h +++ b/phtree/v16/for_each.h @@ -18,7 +18,7 @@ #define PHTREE_V16_FOR_EACH_H #include "../common/common.h" -#include "iterator_simple.h" +#include "iterator_with_parent.h" namespace improbable::phtree::v16 { diff --git a/phtree/v16/for_each_hc.h b/phtree/v16/for_each_hc.h index 01d6e89b..02ab93cb 100644 --- a/phtree/v16/for_each_hc.h +++ b/phtree/v16/for_each_hc.h @@ -18,7 +18,7 @@ #define PHTREE_V16_FOR_EACH_HC_H #include "../common/common.h" -#include "iterator_simple.h" +#include "iterator_with_parent.h" namespace improbable::phtree::v16 { diff --git a/phtree/v16/iterator_base.h b/phtree/v16/iterator_base.h index aaeb9101..b806a799 100644 --- a/phtree/v16/iterator_base.h +++ b/phtree/v16/iterator_base.h @@ -22,107 +22,90 @@ namespace improbable::phtree::v16 { -template -class PhTreeV16; - /* * Base class for all PH-Tree iterators. */ -template +template class IteratorBase { - protected: - static constexpr dimension_t DIM = CONVERT::DimInternal; - using KeyInternal = typename CONVERT::KeyInternal; - using SCALAR = typename CONVERT::ScalarInternal; - using EntryT = Entry; - friend PhTreeV16; + using T = typename EntryT::OrigValueT; public: - explicit IteratorBase(const CONVERT* converter) - : current_result_{nullptr} - , current_node_{} - , parent_node_{} - , is_finished_{false} - , converter_{converter} - , filter_{FILTER()} {} - - explicit IteratorBase(const CONVERT* converter, FILTER filter) - : current_result_{nullptr} - , current_node_{} - , parent_node_{} - , is_finished_{false} - , converter_{converter} - , filter_(std::move(filter)) {} - - T& operator*() const { + explicit IteratorBase() noexcept : current_result_{nullptr} {} + explicit IteratorBase(const EntryT* current_result) noexcept + : current_result_{current_result} {} + + inline T& operator*() const noexcept { assert(current_result_); return current_result_->GetValue(); } - T* operator->() const { + inline T* operator->() const noexcept { assert(current_result_); return ¤t_result_->GetValue(); } - template - friend bool operator==( - const IteratorBase& left, - const IteratorBase& right) { - // Note: The following compares pointers to Entry objects so it should be - // a) fast (i.e. not comparing contents of entries) - // b) return `false` when comparing apparently identical entries from different PH-Trees (as - // intended) - return (left.is_finished_ && right.Finished()) || - (!left.is_finished_ && !right.Finished() && - left.current_result_ == right.GetCurrentResult()); + inline friend bool operator==( + const IteratorBase& left, const IteratorBase& right) noexcept { + return left.current_result_ == right.current_result_; } - template - friend bool operator!=( - const IteratorBase& left, - const IteratorBase& right) { - return !(left == right); - } - - auto first() const { - return converter_->post(current_result_->GetKey()); + inline friend bool operator!=( + const IteratorBase& left, const IteratorBase& right) noexcept { + return left.current_result_ != right.current_result_; } T& second() const { return current_result_->GetValue(); } - [[nodiscard]] bool Finished() const { - return is_finished_; + [[nodiscard]] inline bool IsEnd() const noexcept { + return current_result_ == nullptr; } - const EntryT* GetCurrentResult() const { + inline const EntryT* GetCurrentResult() const noexcept { return current_result_; } protected: void SetFinished() { - is_finished_ = true; current_result_ = nullptr; } - [[nodiscard]] bool ApplyFilter(const EntryT& entry) const { - return entry.IsNode() ? filter_.IsNodeValid(entry.GetKey(), entry.GetNodePostfixLen() + 1) - : filter_.IsEntryValid(entry.GetKey(), entry.GetValue()); - } - void SetCurrentResult(const EntryT* current_result) { current_result_ = current_result; } - void SetCurrentNodeEntry(const EntryT* current_node) { - assert(!current_node || current_node->IsNode()); - current_node_ = current_node; + protected: + const EntryT* current_result_; +}; + +template +using IteratorEnd = IteratorBase; + +template +class IteratorWithFilter +: public IteratorBase> { + protected: + static constexpr dimension_t DIM = CONVERT::DimInternal; + using KeyInternal = typename CONVERT::KeyInternal; + using SCALAR = typename CONVERT::ScalarInternal; + using EntryT = Entry; + + public: + explicit IteratorWithFilter(const CONVERT* converter, FILTER filter) noexcept + : IteratorBase(nullptr), converter_{converter}, filter_(std::forward(filter)) {} + + explicit IteratorWithFilter(const EntryT* current_result, const CONVERT* converter) noexcept + : IteratorBase(current_result), converter_{converter}, filter_{FILTER()} {} + + auto first() const { + return converter_->post(this->current_result_->GetKey()); } - void SetParentNodeEntry(const EntryT* parent_node) { - assert(!parent_node || parent_node->IsNode()); - parent_node_ = parent_node; + protected: + [[nodiscard]] bool ApplyFilter(const EntryT& entry) const { + return entry.IsNode() ? filter_.IsNodeValid(entry.GetKey(), entry.GetNodePostfixLen() + 1) + : filter_.IsEntryValid(entry.GetKey(), entry.GetValue()); } auto post(const KeyInternal& point) { @@ -130,22 +113,6 @@ class IteratorBase { } private: - /* - * The parent entry contains the parent node. The parent node is the node ABOVE the current node - * which contains the current entry. - */ - EntryT* GetCurrentNodeEntry() const { - return const_cast(current_node_); - } - - const EntryT* GetParentNodeEntry() const { - return parent_node_; - } - - const EntryT* current_result_; - const EntryT* current_node_; - const EntryT* parent_node_; - bool is_finished_; const CONVERT* converter_; FILTER filter_; }; diff --git a/phtree/v16/iterator_full.h b/phtree/v16/iterator_full.h index 6be55af9..7dbd401a 100644 --- a/phtree/v16/iterator_full.h +++ b/phtree/v16/iterator_full.h @@ -26,32 +26,32 @@ template class Node; template -class IteratorFull : public IteratorBase { +class IteratorFull : public IteratorWithFilter { static constexpr dimension_t DIM = CONVERT::DimInternal; using SCALAR = typename CONVERT::ScalarInternal; using NodeT = Node; - using EntryT = typename IteratorBase::EntryT; + using EntryT = typename IteratorWithFilter::EntryT; public: IteratorFull(const EntryT& root, const CONVERT* converter, FILTER filter) - : IteratorBase(converter, filter), stack_{}, stack_size_{0} { + : IteratorWithFilter(converter, filter), stack_{}, stack_size_{0} { PrepareAndPush(root.GetNode()); FindNextElement(); } - IteratorFull& operator++() { + IteratorFull& operator++() noexcept { FindNextElement(); return *this; } - IteratorFull operator++(int) { + IteratorFull operator++(int) noexcept { IteratorFull iterator(*this); ++(*this); return iterator; } private: - void FindNextElement() { + void FindNextElement() noexcept { while (!IsEmpty()) { auto* p = &Peek(); while (*p != PeekEnd()) { @@ -82,22 +82,22 @@ class IteratorFull : public IteratorBase { return stack_[stack_size_ - 1].first; } - auto& Peek() { + auto& Peek() noexcept { assert(stack_size_ > 0); return stack_[stack_size_ - 1].first; } - auto& PeekEnd() { + auto& PeekEnd() noexcept { assert(stack_size_ > 0); return stack_[stack_size_ - 1].second; } - auto& Pop() { + auto& Pop() noexcept { assert(stack_size_ > 0); return stack_[--stack_size_].first; } - bool IsEmpty() { + bool IsEmpty() noexcept { return stack_size_ == 0; } diff --git a/phtree/v16/iterator_hc.h b/phtree/v16/iterator_hc.h index 441b9b71..c1467fd6 100644 --- a/phtree/v16/iterator_hc.h +++ b/phtree/v16/iterator_hc.h @@ -18,7 +18,7 @@ #define PHTREE_V16_ITERATOR_HC_H #include "../common/common.h" -#include "iterator_simple.h" +#include "iterator_with_parent.h" namespace improbable::phtree::v16 { @@ -42,11 +42,11 @@ class NodeIterator; * 2017. */ template -class IteratorHC : public IteratorBase { +class IteratorHC : public IteratorWithFilter { static constexpr dimension_t DIM = CONVERT::DimInternal; using KeyInternal = typename CONVERT::KeyInternal; using SCALAR = typename CONVERT::ScalarInternal; - using EntryT = typename IteratorBase::EntryT; + using EntryT = typename IteratorWithFilter::EntryT; public: IteratorHC( @@ -55,7 +55,7 @@ class IteratorHC : public IteratorBase { const KeyInternal& range_max, const CONVERT* converter, FILTER filter) - : IteratorBase(converter, filter) + : IteratorWithFilter(converter, filter) , stack_size_{0} , range_min_{range_min} , range_max_{range_max} { @@ -64,23 +64,22 @@ class IteratorHC : public IteratorBase { FindNextElement(); } - IteratorHC& operator++() { + IteratorHC& operator++() noexcept { FindNextElement(); return *this; } - IteratorHC operator++(int) { + IteratorHC operator++(int) noexcept { IteratorHC iterator(*this); ++(*this); return iterator; } private: - void FindNextElement() { - assert(!this->Finished()); + void FindNextElement() noexcept { while (!IsEmpty()) { auto* p = &Peek(); - const EntryT* current_result = nullptr; + const EntryT* current_result; while ((current_result = p->Increment(range_min_, range_max_))) { if (this->ApplyFilter(*current_result)) { if (current_result->IsNode()) { @@ -98,7 +97,7 @@ class IteratorHC : public IteratorBase { this->SetFinished(); } - auto& PrepareAndPush(const EntryT& entry) { + auto& PrepareAndPush(const EntryT& entry) noexcept { if (stack_.size() < stack_size_ + 1) { stack_.emplace_back(); } @@ -108,17 +107,17 @@ class IteratorHC : public IteratorBase { return ni; } - auto& Peek() { + auto& Peek() noexcept { assert(stack_size_ > 0); return stack_[stack_size_ - 1]; } - auto& Pop() { + auto& Pop() noexcept { assert(stack_size_ > 0); return stack_[--stack_size_]; } - bool IsEmpty() { + bool IsEmpty() noexcept { return stack_size_ == 0; } @@ -190,7 +189,7 @@ class NodeIterator { } private: - [[nodiscard]] bool IsPosValid(hc_pos_t key) const { + [[nodiscard]] inline bool IsPosValid(hc_pos_t key) const noexcept { return ((key | mask_lower_) & mask_upper_) == key; } diff --git a/phtree/v16/iterator_knn_hs.h b/phtree/v16/iterator_knn_hs.h index 2ed8c844..1ffc13d9 100644 --- a/phtree/v16/iterator_knn_hs.h +++ b/phtree/v16/iterator_knn_hs.h @@ -44,12 +44,12 @@ struct CompareEntryDistByDistance { } // namespace template -class IteratorKnnHS : public IteratorBase { +class IteratorKnnHS : public IteratorWithFilter { static constexpr dimension_t DIM = CONVERT::DimInternal; using KeyExternal = typename CONVERT::KeyExternal; using KeyInternal = typename CONVERT::KeyInternal; using SCALAR = typename CONVERT::ScalarInternal; - using EntryT = typename IteratorBase::EntryT; + using EntryT = typename IteratorWithFilter::EntryT; using EntryDistT = EntryDist; public: @@ -60,7 +60,7 @@ class IteratorKnnHS : public IteratorBase { const CONVERT* converter, DISTANCE dist, FILTER filter) - : IteratorBase(converter, filter) + : IteratorWithFilter(converter, filter) , center_{center} , center_post_{converter->post(center)} , current_distance_{std::numeric_limits::max()} @@ -81,12 +81,12 @@ class IteratorKnnHS : public IteratorBase { return current_distance_; } - IteratorKnnHS& operator++() { + IteratorKnnHS& operator++() noexcept { FindNextElement(); return *this; } - IteratorKnnHS operator++(int) { + IteratorKnnHS operator++(int) noexcept { IteratorKnnHS iterator(*this); ++(*this); return iterator; @@ -96,7 +96,7 @@ class IteratorKnnHS : public IteratorBase { void FindNextElement() { while (num_found_results_ < num_requested_results_ && !queue_.empty()) { auto& candidate = queue_.top(); - auto o = candidate.second; + auto* o = candidate.second; if (!o->IsNode()) { // data entry ++num_found_results_; diff --git a/phtree/v16/iterator_simple.h b/phtree/v16/iterator_with_parent.h similarity index 58% rename from phtree/v16/iterator_simple.h rename to phtree/v16/iterator_with_parent.h index 703d56d6..69dbfe6c 100644 --- a/phtree/v16/iterator_simple.h +++ b/phtree/v16/iterator_with_parent.h @@ -23,45 +23,49 @@ namespace improbable::phtree::v16 { template -class IteratorSimple : public IteratorBase { +class IteratorWithParent : public IteratorWithFilter { static constexpr dimension_t DIM = CONVERT::DimInternal; using SCALAR = typename CONVERT::ScalarInternal; - using EntryT = typename IteratorBase::EntryT; + using EntryT = typename IteratorWithFilter::EntryT; + friend PhTreeV16; public: - explicit IteratorSimple(const CONVERT* converter) : IteratorBase(converter) { - this->SetFinished(); - } - - explicit IteratorSimple( + explicit IteratorWithParent( const EntryT* current_result, const EntryT* current_node, const EntryT* parent_node, - const CONVERT* converter) - : IteratorBase(converter) { - if (current_result) { - this->SetCurrentResult(current_result); - this->SetCurrentNodeEntry(current_node); - this->SetParentNodeEntry(parent_node); - } else { - this->SetFinished(); - } - } + const CONVERT* converter) noexcept + : IteratorWithFilter(current_result, converter) + , current_node_{current_node} + , parent_node_{parent_node} {} - IteratorSimple& operator++() { + IteratorWithParent& operator++() { this->SetFinished(); return *this; } - IteratorSimple operator++(int) { - IteratorSimple iterator(*this); + IteratorWithParent operator++(int) { + IteratorWithParent iterator(*this); ++(*this); return iterator; } -}; -template -using IteratorEnd = IteratorSimple; + private: + /* + * The parent entry contains the parent node. The parent node is the node ABOVE the current node + * which contains the current entry. + */ + EntryT* GetCurrentNodeEntry() const { + return const_cast(current_node_); + } + + const EntryT* GetParentNodeEntry() const { + return parent_node_; + } + + const EntryT* current_node_; + const EntryT* parent_node_; +}; } // namespace improbable::phtree::v16 diff --git a/phtree/v16/phtree_v16.h b/phtree/v16/phtree_v16.h index 79c32749..7bee3057 100644 --- a/phtree/v16/phtree_v16.h +++ b/phtree/v16/phtree_v16.h @@ -23,7 +23,7 @@ #include "iterator_full.h" #include "iterator_hc.h" #include "iterator_knn_hs.h" -#include "iterator_simple.h" +#include "iterator_with_parent.h" #include "node.h" namespace improbable::phtree::v16 { @@ -69,10 +69,7 @@ class PhTreeV16 { static_assert(DIM >= 1 && DIM <= 63, "This PH-Tree supports between 1 and 63 dimensions"); PhTreeV16(CONVERT* converter) - : num_entries_{0} - , root_{MAX_BIT_WIDTH - 1} - , converter_{converter} - , the_end_{converter} {} + : num_entries_{0}, root_{MAX_BIT_WIDTH - 1}, converter_{converter} {} PhTreeV16(const PhTreeV16& other) = delete; PhTreeV16& operator=(const PhTreeV16& other) = delete; @@ -123,37 +120,44 @@ class PhTreeV16 { */ template std::pair emplace_hint(const ITERATOR& iterator, const KeyT& key, Args&&... args) { - // This function can be used to insert a value close to a known value - // or close to a recently removed value. The hint can only be used if the new key is - // inside one of the nodes provided by the hint iterator. - // The idea behind using the 'parent' is twofold: - // - The 'parent' node is one level above the iterator position, it therefore is spatially - // larger and has a better probability of containing the new position, allowing for - // fast track emplace. - // - Using 'parent' allows a scenario where the iterator was previously used with - // erase(iterator). This is safe because erase() will never erase the 'parent' node. - - if (!iterator.GetParentNodeEntry()) { - // No hint available, use standard emplace() + if constexpr (!std::is_same_v>) { return emplace(key, std::forward(args)...); + } else { + // This function can be used to insert a value close to a known value + // or close to a recently removed value. The hint can only be used if the new key is + // inside one of the nodes provided by the hint iterator. + // The idea behind using the 'parent' is twofold: + // - The 'parent' node is one level above the iterator position, it is spatially + // larger and has a better probability of containing the new position, allowing for + // fast track emplace. + // - Using 'parent' allows a scenario where the iterator was previously used with + // erase(iterator). This is safe because erase() will never erase the 'parent' node. + + if (!iterator.GetParentNodeEntry()) { + // No hint available, use standard emplace() + return emplace(key, std::forward(args)...); + } + + auto* parent_entry = iterator.GetParentNodeEntry(); + if (NumberOfDivergingBits(key, parent_entry->GetKey()) > + parent_entry->GetNodePostfixLen() + 1) { + // replace higher up in the tree + return emplace(key, std::forward(args)...); + } + + // replace in node + auto* current_entry = parent_entry; + bool is_inserted = false; + while (current_entry->IsNode()) { + current_entry = ¤t_entry->GetNode().Emplace( + is_inserted, + key, + current_entry->GetNodePostfixLen(), + std::forward(args)...); + } + num_entries_ += is_inserted; + return {current_entry->GetValue(), is_inserted}; } - - auto* parent_entry = iterator.GetParentNodeEntry(); - if (NumberOfDivergingBits(key, parent_entry->GetKey()) > - parent_entry->GetNodePostfixLen() + 1) { - // replace higher up in the tree - return emplace(key, std::forward(args)...); - } - - // replace in node - auto* current_entry = parent_entry; - bool is_inserted = false; - while (current_entry->IsNode()) { - current_entry = ¤t_entry->GetNode().Emplace( - is_inserted, key, current_entry->GetNodePostfixLen(), std::forward(args)...); - } - num_entries_ += is_inserted; - return {current_entry->GetValue(), is_inserted}; } /* @@ -200,7 +204,7 @@ class PhTreeV16 { */ auto find(const KeyT& key) const { if (empty()) { - return IteratorSimple(converter_); + return IteratorWithParent(nullptr, nullptr, nullptr, converter_); } const EntryT* current_entry = &root_; @@ -212,7 +216,7 @@ class PhTreeV16 { current_entry = current_entry->GetNode().Find(key, current_entry->GetNodePostfixLen()); } - return IteratorSimple(current_entry, current_node, parent_node, converter_); + return IteratorWithParent(current_entry, current_node, parent_node, converter_); } /* @@ -251,25 +255,27 @@ class PhTreeV16 { */ template size_t erase(const ITERATOR& iterator) { - if (iterator.Finished()) { + if (iterator.IsEnd()) { return 0; } - if (!iterator.GetCurrentNodeEntry() || iterator.GetCurrentNodeEntry() == &root_) { - // There may be no entry because not every iterator sets it. - // Also, do _not_ use the root entry, see erase(key). - // Start searching from the top. - return erase(iterator.GetCurrentResult()->GetKey()); + if constexpr (std::is_same_v>) { + const auto& iter_rich = static_cast&>(iterator); + if (!iter_rich.GetCurrentNodeEntry() || iter_rich.GetCurrentNodeEntry() == &root_) { + // Do _not_ use the root entry, see erase(key). Start searching from the top. + return erase(iter_rich.GetCurrentResult()->GetKey()); + } + bool found = false; + assert(iter_rich.GetCurrentNodeEntry() && iter_rich.GetCurrentNodeEntry()->IsNode()); + iter_rich.GetCurrentNodeEntry()->GetNode().Erase( + iter_rich.GetCurrentResult()->GetKey(), + iter_rich.GetCurrentNodeEntry(), + iter_rich.GetCurrentNodeEntry()->GetNodePostfixLen(), + found); + num_entries_ -= found; + return found; } - bool found = false; - assert(iterator.GetCurrentNodeEntry() && iterator.GetCurrentNodeEntry()->IsNode()); - iterator.GetCurrentNodeEntry()->GetNode().Erase( - iterator.GetCurrentResult()->GetKey(), - iterator.GetCurrentNodeEntry(), - iterator.GetCurrentNodeEntry()->GetNodePostfixLen(), - found); - - num_entries_ -= found; - return found; + // There may be no entry because not every iterator sets it. + return erase(iterator.GetCurrentResult()->GetKey()); } /* @@ -362,8 +368,8 @@ class PhTreeV16 { /* * @return An iterator representing the tree's 'end'. */ - const auto& end() const { - return the_end_; + auto end() const { + return IteratorEnd(); } /* @@ -402,7 +408,6 @@ class PhTreeV16 { // that is allowed to have less than two entries. EntryT root_; CONVERT* converter_; - IteratorEnd the_end_; }; } // namespace improbable::phtree::v16 From 582fccf0364c78b74dd8bbe3636d84d3979470db Mon Sep 17 00:00:00 2001 From: Tilmann Date: Fri, 8 Apr 2022 14:25:27 +0200 Subject: [PATCH 14/16] Initial (#25) --- CHANGELOG.md | 4 + WORKSPACE | 6 +- phtree/BUILD | 13 + phtree/phtree.h | 35 +- phtree/phtree_d_test_filter.cc | 340 ++++++++++++++++++ phtree/phtree_multimap.h | 183 ++++++---- phtree/phtree_multimap_d_test.cc | 2 +- phtree/phtree_multimap_d_test_filter.cc | 439 ++++++++++++++++++++++++ phtree/phtree_test.cc | 2 - phtree/v16/for_each.h | 11 +- phtree/v16/for_each_hc.h | 15 +- phtree/v16/iterator_base.h | 11 +- phtree/v16/iterator_full.h | 7 +- phtree/v16/iterator_hc.h | 5 +- phtree/v16/iterator_knn_hs.h | 9 +- phtree/v16/phtree_v16.h | 52 ++- 16 files changed, 1000 insertions(+), 134 deletions(-) create mode 100644 phtree/phtree_multimap_d_test_filter.cc diff --git a/CHANGELOG.md b/CHANGELOG.md index fe2fbdde..87989b8b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] ### Changed +- Potentially **BREAKING CHANGE**: Refactored API of all methods that accept callbacks and filters to + accept universal/forwarding references. + Also changed filters and callback to not require `const` methods. + [#22](https://github.com/tzaeschke/phtree-cpp/issues/22) - Clean up iterator implementations. [#19](https://github.com/tzaeschke/phtree-cpp/issues/19) - Make PhTree and PhTreeMultimap movable (move-assign/copy). [#18](https://github.com/tzaeschke/phtree-cpp/issues/18) - Potentially **BREAKING CHANGE** when using `IsNodeValid()` in provided filters: diff --git a/WORKSPACE b/WORKSPACE index e22c6961..59a65650 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -42,9 +42,9 @@ http_archive( http_archive( name = "gtest", build_file = "@third_party//gtest:BUILD", - sha256 = "9dc9157a9a1551ec7a7e43daea9a694a0bb5fb8bec81235d8a1e6ef64c716dcb", - strip_prefix = "googletest-release-1.10.0", - url = "https://github.com/google/googletest/archive/release-1.10.0.tar.gz", + sha256 = "b4870bf121ff7795ba20d20bcdd8627b8e088f2d1dab299a031c1034eddc93d5", + strip_prefix = "googletest-release-1.11.0", + url = "https://github.com/google/googletest/archive/release-1.11.0.tar.gz", ) # Development environment tooling diff --git a/phtree/BUILD b/phtree/BUILD index fe48ccc8..f0f75d93 100644 --- a/phtree/BUILD +++ b/phtree/BUILD @@ -108,6 +108,19 @@ cc_test( ], ) +cc_test( + name = "phtree_multimap_d_test_filter", + timeout = "long", + srcs = [ + "phtree_multimap_d_test_filter.cc", + ], + linkstatic = True, + deps = [ + ":phtree", + "//phtree/testing/gtest_main", + ], +) + cc_test( name = "phtree_d_test_custom_key", timeout = "long", diff --git a/phtree/phtree.h b/phtree/phtree.h index e54d174d..11087c73 100644 --- a/phtree/phtree.h +++ b/phtree/phtree.h @@ -166,9 +166,9 @@ class PhTree { * sub-nodes before they are returned or traversed. Any filter function must follow the * signature of the default 'FilterNoOp`. */ - template - void for_each(CALLBACK_FN& callback, FILTER filter = FILTER()) const { - tree_.for_each(callback, filter); + template + void for_each(CALLBACK&& callback, FILTER&& filter = FILTER()) const { + tree_.for_each(std::forward(callback), std::forward(filter)); } /* @@ -183,15 +183,18 @@ class PhTree { * signature of the default 'FilterNoOp`. */ template < - typename CALLBACK_FN, + typename CALLBACK, typename FILTER = FilterNoOp, typename QUERY_TYPE = DEFAULT_QUERY_TYPE> void for_each( QueryBox query_box, - CALLBACK_FN& callback, - FILTER filter = FILTER(), + CALLBACK&& callback, + FILTER&& filter = FILTER(), QUERY_TYPE query_type = QUERY_TYPE()) const { - tree_.for_each(query_type(converter_.pre_query(query_box)), callback, filter); + tree_.for_each( + query_type(converter_.pre_query(query_box)), + std::forward(callback), + std::forward(filter)); } /* @@ -202,8 +205,8 @@ class PhTree { * @return an iterator over all (filtered) entries in the tree, */ template - auto begin(FILTER filter = FILTER()) const { - return tree_.begin(filter); + auto begin(FILTER&& filter = FILTER()) const { + return tree_.begin(std::forward(filter)); } /* @@ -219,9 +222,10 @@ class PhTree { template auto begin_query( const QueryBox& query_box, - FILTER filter = FILTER(), + FILTER&& filter = FILTER(), QUERY_TYPE query_type = DEFAULT_QUERY_TYPE()) const { - return tree_.begin_query(query_type(converter_.pre_query(query_box)), filter); + return tree_.begin_query( + query_type(converter_.pre_query(query_box)), std::forward(filter)); } /* @@ -246,12 +250,15 @@ class PhTree { auto begin_knn_query( size_t min_results, const Key& center, - DISTANCE distance_function = DISTANCE(), - FILTER filter = FILTER()) const { + DISTANCE&& distance_function = DISTANCE(), + FILTER&& filter = FILTER()) const { // We use pre() instead of pre_query() here because, strictly speaking, we want to // find the nearest neighbors of a (fictional) key, which may as well be a box. return tree_.begin_knn_query( - min_results, converter_.pre(center), distance_function, filter); + min_results, + converter_.pre(center), + std::forward(distance_function), + std::forward(filter)); } /* diff --git a/phtree/phtree_d_test_filter.cc b/phtree/phtree_d_test_filter.cc index f5470190..d7c68d75 100644 --- a/phtree/phtree_d_test_filter.cc +++ b/phtree/phtree_d_test_filter.cc @@ -40,6 +40,23 @@ class DoubleRng { std::uniform_real_distribution rnd; }; +struct Id { + Id() = default; + + explicit Id(const int i) : _i(i){}; + + bool operator==(const Id& rhs) const { + return _i == rhs._i; + } + + Id(Id const& rhs) = default; + Id(Id&& rhs) = default; + Id& operator=(Id const& rhs) = default; + Id& operator=(Id&& rhs) = default; + + int _i; +}; + template void generateCube(std::vector>& points, size_t N) { DoubleRng rng(-1000, 1000); @@ -68,3 +85,326 @@ void populate(TestTree& tree, std::vector>& points, } ASSERT_EQ(N, tree.size()); } + +static int f_default_construct_ = 0; +static int f_construct_ = 0; +static int f_copy_construct_ = 0; +static int f_move_construct_ = 0; +static int f_copy_assign_ = 0; +static int f_move_assign_ = 0; +static int f_destruct_ = 0; + +static void f_reset_id_counters() { + f_default_construct_ = 0; + f_construct_ = 0; + f_copy_construct_ = 0; + f_move_construct_ = 0; + f_copy_assign_ = 0; + f_move_assign_ = 0; + f_destruct_ = 0; +} + +template +struct FilterCount { + FilterCount() : last_known{} { + ++f_default_construct_; + } + + explicit FilterCount(const T i) : last_known{i} { + ++f_construct_; + } + + FilterCount(const FilterCount& other) { + ++f_copy_construct_; + last_known = other.last_known; + } + + FilterCount(FilterCount&& other) noexcept { + ++f_move_construct_; + last_known = other.last_known; + } + + FilterCount& operator=(const FilterCount& other) noexcept { + ++f_copy_assign_; + last_known = other.last_known; + return *this; + } + FilterCount& operator=(FilterCount&& other) noexcept { + ++f_move_assign_; + last_known = other.last_known; + return *this; + } + + ~FilterCount() { + ++f_destruct_; + } + + [[nodiscard]] constexpr bool IsEntryValid(const PhPoint&, const T& value) { + last_known = const_cast(value); + return true; + } + [[nodiscard]] constexpr bool IsNodeValid(const PhPoint&, int) { + return true; + } + + T last_known; +}; + +template +struct DistanceCount { + DistanceCount() { + ++f_default_construct_; + } + + DistanceCount(const DistanceCount&) { + ++f_copy_construct_; + } + + DistanceCount(DistanceCount&&) noexcept { + ++f_move_construct_; + } + + DistanceCount& operator=(const DistanceCount&) noexcept { + ++f_copy_assign_; + return *this; + } + DistanceCount& operator=(DistanceCount&&) noexcept { + ++f_move_assign_; + return *this; + } + + ~DistanceCount() { + ++f_destruct_; + } + + double operator()(const PhPointD& p1, const PhPointD& p2) const { + double sum2 = 0; + for (dimension_t i = 0; i < DIM; ++i) { + double d2 = p1[i] - p2[i]; + sum2 += d2 * d2; + } + return sqrt(sum2); + }; +}; + +static size_t static_id = 0; + +template +struct CallbackCount { + CallbackCount() { + static_id = 0; + ++f_default_construct_; + } + + CallbackCount(const CallbackCount&) { + ++f_copy_construct_; + } + + CallbackCount(CallbackCount&&) noexcept { + ++f_move_construct_; + } + + CallbackCount& operator=(const CallbackCount&) noexcept { + ++f_copy_assign_; + return *this; + } + CallbackCount& operator=(CallbackCount&&) noexcept { + ++f_move_assign_; + return *this; + } + + ~CallbackCount() { + ++f_destruct_; + } + + void operator()(TestPoint, Id& t) { + static_id = t._i; + } +}; + +template +struct FilterConst { + [[nodiscard]] constexpr bool IsEntryValid(const PhPoint&, const T& value) const { + assert(value._i == 1); + return true; + } + [[nodiscard]] constexpr bool IsNodeValid(const PhPoint&, int) const { + return true; + } +}; + +template +struct CallbackConst { + void operator()(const TestPoint, const Id& t) const { + static_id = t._i; + } +}; + +static void print_id_counters() { + std::cout << "dc=" << f_default_construct_ << " c=" << f_construct_ + << " cc=" << f_copy_construct_ << " mc=" << f_move_construct_ + << " ca=" << f_copy_assign_ << " ma=" << f_move_assign_ << " d=" << f_destruct_ + << std::endl; +} + +TEST(PhTreeTest, TestFilterAPI_FOR_EACH) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + CallbackCount<3> callback; + FilterCount<3, Id> filter{}; + // rvalue + tree.for_each(callback, filter); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // lvalue + tree.for_each(CallbackCount<3>(), FilterCount<3, Id>()); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_LE(1, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + CallbackCount<3> callbackC; + FilterConst<3, Id> filterC; + treeC.for_each(callbackC, filterC); + // rvalue + treeC.for_each(CallbackConst<3>{}, FilterConst<3, Id>()); + f_reset_id_counters(); +} + +TEST(PhTreeTest, TestFilterAPI_FOR_EACH_WQ) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + TestTree<3, Id>::QueryBox qb{{1, 2, 3}, {4, 5, 6}}; + CallbackCount<3> callback; + FilterCount<3, Id> filter{}; + // lvalue + tree.for_each(qb, callback, filter); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + tree.for_each({{1, 2, 3}, {4, 5, 6}}, CallbackCount<3>{}, FilterCount<3, Id>()); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_LE(1, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + treeC.for_each(qb, callback, filterC); + // rvalue + treeC.for_each({{1, 2, 3}, {4, 5, 6}}, CallbackConst<3>(), FilterConst<3, Id>()); + f_reset_id_counters(); +} + +TEST(PhTreeTest, TestFilterAPI_BEGIN) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + FilterCount<3, Id> filter{}; + // lvalue + ASSERT_EQ(tree.begin(filter)->_i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + ASSERT_EQ(tree.begin(FilterCount<3, Id>())->_i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_LE(1, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + ASSERT_EQ(treeC.begin(filterC)->_i, 1); + // rvalue + ASSERT_EQ(treeC.begin(FilterConst<3, Id>())->_i, 1); + f_reset_id_counters(); +} + +TEST(PhTreeTest, TestFilterAPI_WQ) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + TestTree<3, Id>::QueryBox qb{{1, 2, 3}, {4, 5, 6}}; + FilterCount<3, Id> filter{}; + // lvalue + ASSERT_EQ(tree.begin_query(qb, filter)->_i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + ASSERT_EQ(tree.begin_query({{1, 2, 3}, {4, 5, 6}}, FilterCount<3, Id>())->_i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_LE(1, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + ASSERT_EQ(treeC.begin_query(qb, filterC)->_i, 1); + // rvalue + ASSERT_EQ(treeC.begin_query(qb, FilterConst<3, Id>())->_i, 1); + f_reset_id_counters(); +} + +TEST(PhTreeTest, TestFilterAPI_KNN) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + FilterCount<3, Id> filter{}; + DistanceCount<3> dist_fn{}; + // lvalue + ASSERT_EQ(tree.begin_knn_query(3, {2, 3, 4}, dist_fn, filter)->_i, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + ASSERT_EQ(tree.begin_knn_query(3, {2, 3, 4}, DistanceCount<3>{}, FilterCount<3, Id>())->_i, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_LE(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue #2 + auto a = tree.begin_knn_query, FilterCount<3, Id>>(3, {2, 3, 4})->_i; + ASSERT_EQ(a, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_LE(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + ASSERT_EQ(treeC.begin_knn_query(3, {2, 3, 4}, dist_fn, filterC)->_i, 1); + // rvalue + ASSERT_EQ(treeC.begin_knn_query(3, {2, 3, 4}, DistanceCount<3>{}, FilterConst<3, Id>())->_i, 1); + f_reset_id_counters(); +} \ No newline at end of file diff --git a/phtree/phtree_multimap.h b/phtree/phtree_multimap.h index 41efb771..98d027ed 100644 --- a/phtree/phtree_multimap.h +++ b/phtree/phtree_multimap.h @@ -95,21 +95,19 @@ class IteratorBase { const T* current_value_ptr_; }; -template +template class IteratorNormal : public IteratorBase { friend PHTREE; using BucketIterType = typename IteratorBase::BucketIterType; public: - explicit IteratorNormal() noexcept - : IteratorBase(), iter_ph_{}, iter_bucket_{}, filter_{} {} + explicit IteratorNormal() noexcept : IteratorBase(), iter_ph_{}, iter_bucket_{} {} - template - IteratorNormal(ITER_PH&& iter_ph, BucketIterType&& iter_bucket, FILTER2&& filter) noexcept + template + IteratorNormal(ITER_PH&& iter_ph, BucketIterType&& iter_bucket) noexcept : IteratorBase() , iter_ph_{std::forward(iter_ph)} - , iter_bucket_{std::forward(iter_bucket)} - , filter_{std::forward(filter)} { + , iter_bucket_{std::forward(iter_bucket)} { FindNextElement(); } @@ -146,7 +144,8 @@ class IteratorNormal : public IteratorBase { while (!iter_ph_.IsEnd()) { while (iter_bucket_ != iter_ph_->end()) { // We filter only entries here, nodes are filtered elsewhere - if (filter_.IsEntryValid(iter_ph_.GetCurrentResult()->GetKey(), *iter_bucket_)) { + if (iter_ph_.__Filter().IsEntryValid( + iter_ph_.GetCurrentResult()->GetKey(), *iter_bucket_)) { this->SetCurrentValue(&(*iter_bucket_)); return; } @@ -163,16 +162,15 @@ class IteratorNormal : public IteratorBase { ITERATOR_PH iter_ph_; BucketIterType iter_bucket_; - FILTER filter_; }; -template -class IteratorKnn : public IteratorNormal { +template +class IteratorKnn : public IteratorNormal { public: template - IteratorKnn(ITERATOR_PH iter_ph, BucketIterType&& iter_bucket, const FILTER filter) noexcept - : IteratorNormal( - std::forward(iter_ph), std::forward(iter_bucket), filter) {} + IteratorKnn(ITERATOR_PH iter_ph, BucketIterType&& iter_bucket) noexcept + : IteratorNormal( + std::forward(iter_ph), std::forward(iter_bucket)) {} [[nodiscard]] double distance() const noexcept { return this->GetIteratorOfPhTree().distance(); @@ -320,7 +318,7 @@ class PhTreeMultiMap { auto find(const Key& key) const { auto outer_iter = tree_.find(converter_.pre(key)); if (outer_iter == tree_.end()) { - return CreateIterator(outer_iter, BucketIterType{}); + return CreateIterator(outer_iter); } auto bucket_iter = outer_iter.second().begin(); return CreateIterator(outer_iter, bucket_iter); @@ -337,7 +335,7 @@ class PhTreeMultiMap { auto find(const Key& key, const T& value) const { auto outer_iter = tree_.find(converter_.pre(key)); if (outer_iter == tree_.end()) { - return CreateIterator(outer_iter, BucketIterType{}); + return CreateIterator(outer_iter); } auto bucket_iter = outer_iter.second().find(value); return CreateIterator(outer_iter, bucket_iter); @@ -452,7 +450,7 @@ class PhTreeMultiMap { /* * Iterates over all entries in the tree. The optional filter allows filtering entries and nodes - * (=sub-trees) before returning / traversing them. By default all entries are returned. Filter + * (=sub-trees) before returning / traversing them. By default, all entries are returned. Filter * functions must implement the same signature as the default 'FilterNoOp'. * * @param callback The callback function to be called for every entry that matches the filter. @@ -462,10 +460,12 @@ class PhTreeMultiMap { * follow the signature of the default 'FilterNoOp`. * The default 'FilterNoOp` filter matches all entries. */ - template - void for_each(CALLBACK_FN& callback, FILTER filter = FILTER()) const { - CallbackWrapper inner_callback{callback, filter, converter_}; - tree_.for_each(inner_callback, WrapFilter(filter)); + template + void for_each(CALLBACK&& callback, FILTER&& filter = FILTER()) const { + tree_.for_each( + NoOpCallback(), + WrapCallbackFilter{ + std::forward(callback), std::forward(filter), converter_}); } /* @@ -482,35 +482,37 @@ class PhTreeMultiMap { * The default 'FilterNoOp` filter matches all entries. */ template < - typename CALLBACK_FN, + typename CALLBACK, typename FILTER = FilterNoOp, typename QUERY_TYPE = DEFAULT_QUERY_TYPE> void for_each( QueryBox query_box, - CALLBACK_FN& callback, - const FILTER& filter = FILTER(), + CALLBACK&& callback, + FILTER&& filter = FILTER(), QUERY_TYPE query_type = QUERY_TYPE()) const { - CallbackWrapper inner_callback{callback, filter, converter_}; tree_.for_each( - query_type(converter_.pre_query(query_box)), inner_callback, WrapFilter(filter)); + query_type(converter_.pre_query(query_box)), + NoOpCallback(), + WrapCallbackFilter( + std::forward(callback), std::forward(filter), converter_)); } /* * Iterates over all entries in the tree. The optional filter allows filtering entries and nodes - * (=sub-trees) before returning / traversing them. By default all entries are returned. Filter + * (=sub-trees) before returning / traversing them. By default, all entries are returned. Filter * functions must implement the same signature as the default 'FilterNoOp'. * * @return an iterator over all (filtered) entries in the tree, */ template - auto begin(FILTER filter = FILTER()) const { - auto outer_iter = tree_.begin(WrapFilter(filter)); + auto begin(FILTER&& filter = FILTER()) const { + auto outer_iter = tree_.begin(WrapFilter(std::forward(filter))); if (outer_iter == tree_.end()) { - return CreateIterator(outer_iter, BucketIterType{}, filter); + return CreateIterator(outer_iter); } auto bucket_iter = outer_iter.second().begin(); assert(bucket_iter != outer_iter.second().end()); - return CreateIterator(outer_iter, bucket_iter, filter); + return CreateIterator(outer_iter, bucket_iter); } /* @@ -526,16 +528,16 @@ class PhTreeMultiMap { template auto begin_query( const QueryBox& query_box, - FILTER filter = FILTER(), + FILTER&& filter = FILTER(), QUERY_TYPE query_type = QUERY_TYPE()) const { - auto outer_iter = - tree_.begin_query(query_type(converter_.pre_query(query_box)), WrapFilter(filter)); + auto outer_iter = tree_.begin_query( + query_type(converter_.pre_query(query_box)), WrapFilter(std::forward(filter))); if (outer_iter == tree_.end()) { - return CreateIterator(outer_iter, BucketIterType{}, filter); + return CreateIterator(outer_iter, BucketIterType{}); } auto bucket_iter = outer_iter.second().begin(); assert(bucket_iter != outer_iter.second().end()); - return CreateIterator(outer_iter, bucket_iter, filter); + return CreateIterator(outer_iter, bucket_iter); } /* @@ -560,25 +562,28 @@ class PhTreeMultiMap { auto begin_knn_query( size_t min_results, const Key& center, - DISTANCE distance_function = DISTANCE(), - FILTER filter = FILTER()) const { + DISTANCE&& distance_function = DISTANCE(), + FILTER&& filter = FILTER()) const { // We use pre() instead of pre_query() here because, strictly speaking, we want to // find the nearest neighbors of a (fictional) key, which may as well be a box. auto outer_iter = tree_.begin_knn_query( - min_results, converter_.pre(center), distance_function, WrapFilter(filter)); + min_results, + converter_.pre(center), + std::forward(distance_function), + WrapFilter(std::forward(filter))); if (outer_iter == tree_.end()) { - return CreateIteratorKnn(outer_iter, BucketIterType{}, filter); + return CreateIteratorKnn(outer_iter); } auto bucket_iter = outer_iter.second().begin(); assert(bucket_iter != outer_iter.second().end()); - return CreateIteratorKnn(outer_iter, bucket_iter, filter); + return CreateIteratorKnn(outer_iter, bucket_iter); } /* * @return An iterator representing the tree's 'end'. */ auto end() const { - return IteratorNormal{}; + return IteratorNormal{}; } /* @@ -616,64 +621,90 @@ class PhTreeMultiMap { return tree_; } - template - auto CreateIterator( - OUTER_ITER outer_iter, INNER_ITER&& bucket_iter, FILTER&& filter = FILTER()) const { - return IteratorNormal( - std::forward(outer_iter), - std::forward(bucket_iter), - std::forward(filter)); + template + auto CreateIterator(OUTER_ITER outer_iter, INNER_ITER&& bucket_iter = INNER_ITER{}) const { + return IteratorNormal( + std::forward(outer_iter), std::forward(bucket_iter)); } - template - auto CreateIteratorKnn( - OUTER_ITER outer_iter, INNER_ITER&& bucket_iter, FILTER&& filter = FILTER()) const { - return IteratorKnn( - std::forward(outer_iter), - std::forward(bucket_iter), - std::forward(filter)); + template + auto CreateIteratorKnn(OUTER_ITER outer_iter, INNER_ITER&& bucket_iter = INNER_ITER{}) const { + return IteratorKnn( + std::forward(outer_iter), std::forward(bucket_iter)); } + /* + * We have two iterators, one that traverses the PH-Tree and one that traverses the + * bucket. We need two IsEntryValid() for these two iterators. + * The IsEntryValid() for the PH-Tree iterator always returns true (we do not support + * checking buckets at the moment). + * The IsEntryValid() for the bucket iterator forwards the call to the user defined + * IsEntryValid() for every entry in the bucket. + */ template - static auto WrapFilter(FILTER filter) { - // We always have two iterators, one that traverses the PH-Tree and one that traverses the - // bucket. Using the FilterWrapper we create a new Filter for the PH-Tree iterator. This new - // filter checks only if nodes are valid. It cannot check whether buckets are valid. - // The original filter is then used when we iterate over the entries of a bucket. At this - // point, we do not need to check IsNodeValid anymore for each entry (see `IteratorNormal`). + static auto WrapFilter(FILTER&& filter) { struct FilterWrapper { - [[nodiscard]] constexpr bool IsEntryValid(const KeyInternal&, const BUCKET&) const { - // This filter is checked in the Iterator. + [[nodiscard]] constexpr bool IsEntryValid(const KeyInternal&, const BUCKET&) { + // This filter is used in the PH-Tree iterator. return true; } + [[nodiscard]] constexpr bool IsEntryValid(const KeyInternal& key, const T& value) { + // This filter is used in the PH-Tree multimap iterator (bucket iterator). + return filter_.IsEntryValid(key, value); + } [[nodiscard]] constexpr bool IsNodeValid( - const KeyInternal& prefix, int bits_to_ignore) const { + const KeyInternal& prefix, int bits_to_ignore) { return filter_.IsNodeValid(prefix, bits_to_ignore); } FILTER filter_; }; - return FilterWrapper{filter}; + return FilterWrapper{std::forward(filter)}; } - template - struct CallbackWrapper { - /* - * The CallbackWrapper ensures that we call the callback on each entry of the bucket. - * The vanilla PH-Tree call it only on the bucket itself. - */ - void operator()(const Key& key, const BUCKET& bucket) const { - auto internal_key = converter_.pre(key); + /* + * This wrapper wraps the Filter and Callback such that the callback is called for every + * bucket entry that matches the user defined IsEntryValid(). + */ + template + class WrapCallbackFilter { + public: + // We always have two iterators, one that traverses the PH-Tree and one that traverses the + // bucket. Using the FilterWrapper we create a new Filter for the PH-Tree iterator. This new + // filter checks only if nodes are valid. It cannot check whether buckets are valid. + // The original filter is then used when we iterate over the entries of a bucket. At this + // point, we do not need to check IsNodeValid anymore for each entry (see `IteratorNormal`). + template + WrapCallbackFilter(CB&& callback, F&& filter, const CONVERTER& converter) + : callback_{std::forward(callback)} + , filter_{std::forward(filter)} + , converter_{converter} {} + + [[nodiscard]] constexpr bool IsEntryValid( + const KeyInternal& internal_key, const BUCKET& bucket) { + auto key = converter_.post(internal_key); for (auto& entry : bucket) { if (filter_.IsEntryValid(internal_key, entry)) { callback_(key, entry); } } + // Return false. We already called the callback. + return false; + } + + [[nodiscard]] constexpr bool IsNodeValid(const KeyInternal& prefix, int bits_to_ignore) { + return filter_.IsNodeValid(prefix, bits_to_ignore); } - CALLBACK_FN& callback_; - const FILTER filter_; + + private: + CALLBACK callback_; + FILTER filter_; const CONVERTER& converter_; }; + struct NoOpCallback { + void operator()(const Key&, const BUCKET&) {} + }; + v16::PhTreeV16 tree_; CONVERTER converter_; size_t size_; diff --git a/phtree/phtree_multimap_d_test.cc b/phtree/phtree_multimap_d_test.cc index 1d493446..cdca9bde 100644 --- a/phtree/phtree_multimap_d_test.cc +++ b/phtree/phtree_multimap_d_test.cc @@ -1184,7 +1184,7 @@ TEST(PhTreeTest, TestMovableIterators) { tree.emplace(p, Id{1}); ASSERT_TRUE(std::is_move_constructible_v); - // ASSERT_TRUE(std::is_move_assignable_v); + ASSERT_TRUE(std::is_move_assignable_v); ASSERT_NE(tree.begin(), tree.end()); ASSERT_TRUE(std::is_move_constructible_v); diff --git a/phtree/phtree_multimap_d_test_filter.cc b/phtree/phtree_multimap_d_test_filter.cc new file mode 100644 index 00000000..cd1cff3c --- /dev/null +++ b/phtree/phtree_multimap_d_test_filter.cc @@ -0,0 +1,439 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/phtree_multimap.h" +#include +#include +#include + +using namespace improbable::phtree; + +// Number of entries that have the same coordinate +static const size_t NUM_DUPL = 4; +[[maybe_unused]] static const double WORLD_MIN = -1000; +[[maybe_unused]] static const double WORLD_MAX = 1000; + +template +using TestPoint = PhPointD; + +template +using TestTree = PhTreeMultiMap>; + +class DoubleRng { + public: + DoubleRng(double minIncl, double maxExcl) : eng(), rnd{minIncl, maxExcl} {} + + double next() { + return rnd(eng); + } + + private: + std::default_random_engine eng; + std::uniform_real_distribution rnd; +}; + +struct Id { + Id() = default; + + explicit Id(const int i) : _i(i){}; + + bool operator==(const Id& rhs) const { + return _i == rhs._i; + } + + Id(Id const& rhs) = default; + Id(Id&& rhs) = default; + Id& operator=(Id const& rhs) = default; + Id& operator=(Id&& rhs) = default; + + int _i; +}; + +namespace std { +template <> +struct hash { + size_t operator()(const Id& x) const { + return std::hash{}(x._i); + } +}; +}; // namespace std + +struct IdHash { + template + std::size_t operator()(std::pair const& v) const { + return std::hash()(v.size()); + } +}; + +template +void generateCube(std::vector>& points, size_t N) { + assert(N % NUM_DUPL == 0); + DoubleRng rng(WORLD_MIN, WORLD_MAX); + auto reference_set = std::unordered_map, size_t>(); + + points.reserve(N); + for (size_t i = 0; i < N / NUM_DUPL; i++) { + // create duplicates, ie. entries with the same coordinates. However, avoid unintentional + // duplicates. + TestPoint key{}; + for (dimension_t d = 0; d < DIM; ++d) { + key[d] = rng.next(); + } + if (reference_set.count(key) != 0) { + i--; + continue; + } + reference_set.emplace(key, i); + for (size_t dupl = 0; dupl < NUM_DUPL; dupl++) { + auto point = TestPoint(key); + points.push_back(point); + } + } + ASSERT_EQ(reference_set.size(), N / NUM_DUPL); + ASSERT_EQ(points.size(), N); +} + +template +void populate(TestTree& tree, std::vector>& points, size_t N) { + generateCube(points, N); + for (size_t i = 0; i < N; i++) { + ASSERT_TRUE(tree.insert(points[i], i).second); + } + ASSERT_EQ(N, tree.size()); +} + +static int f_default_construct_ = 0; +static int f_construct_ = 0; +static int f_copy_construct_ = 0; +static int f_move_construct_ = 0; +static int f_copy_assign_ = 0; +static int f_move_assign_ = 0; +static int f_destruct_ = 0; + +static void f_reset_id_counters() { + f_default_construct_ = 0; + f_construct_ = 0; + f_copy_construct_ = 0; + f_move_construct_ = 0; + f_copy_assign_ = 0; + f_move_assign_ = 0; + f_destruct_ = 0; +} + +template +struct FilterCount { + FilterCount() : last_known{} { + ++f_default_construct_; + } + + explicit FilterCount(const T i) : last_known{i} { + ++f_construct_; + } + + FilterCount(const FilterCount& other) { + ++f_copy_construct_; + last_known = other.last_known; + } + + FilterCount(FilterCount&& other) noexcept { + ++f_move_construct_; + last_known = other.last_known; + } + + FilterCount& operator=(const FilterCount& other) noexcept { + ++f_copy_assign_; + last_known = other.last_known; + return *this; + } + FilterCount& operator=(FilterCount&& other) noexcept { + ++f_move_assign_; + last_known = other.last_known; + return *this; + } + + ~FilterCount() { + ++f_destruct_; + } + + [[nodiscard]] constexpr bool IsEntryValid(const PhPoint&, const T& value) { + last_known = const_cast(value); + return true; + } + [[nodiscard]] constexpr bool IsNodeValid(const PhPoint&, int) { + return true; + } + + T last_known; +}; + +template +struct DistanceCount { + DistanceCount() { + ++f_default_construct_; + } + + DistanceCount(const DistanceCount&) { + ++f_copy_construct_; + } + + DistanceCount(DistanceCount&&) noexcept { + ++f_move_construct_; + } + + DistanceCount& operator=(const DistanceCount&) noexcept { + ++f_copy_assign_; + return *this; + } + DistanceCount& operator=(DistanceCount&&) noexcept { + ++f_move_assign_; + return *this; + } + + ~DistanceCount() { + ++f_destruct_; + } + + double operator()(const PhPointD& p1, const PhPointD& p2) const { + double sum2 = 0; + for (dimension_t i = 0; i < DIM; ++i) { + double d2 = p1[i] - p2[i]; + sum2 += d2 * d2; + } + return sqrt(sum2); + }; +}; + +static size_t static_id = 0; + +template +struct CallbackCount { + CallbackCount() { + static_id = 0; + ++f_default_construct_; + } + + CallbackCount(const CallbackCount&) { + ++f_copy_construct_; + } + + CallbackCount(CallbackCount&&) noexcept { + ++f_move_construct_; + } + + CallbackCount& operator=(const CallbackCount&) noexcept { + ++f_copy_assign_; + return *this; + } + CallbackCount& operator=(CallbackCount&&) noexcept { + ++f_move_assign_; + return *this; + } + + ~CallbackCount() { + ++f_destruct_; + } + + void operator()(const TestPoint, const Id& t) { + static_id = t._i; + } +}; + +template +struct FilterConst { + [[nodiscard]] constexpr bool IsEntryValid(const PhPoint&, const T& value) const { + assert(value._i == 1); + return true; + } + [[nodiscard]] constexpr bool IsNodeValid(const PhPoint&, int) const { + return true; + } +}; + +template +struct CallbackConst { + void operator()(const TestPoint, const Id& t) const { + static_id = t._i; + } +}; + +[[maybe_unused]] static void print_id_counters() { + std::cout << "dc=" << f_default_construct_ << " c=" << f_construct_ + << " cc=" << f_copy_construct_ << " mc=" << f_move_construct_ + << " ca=" << f_copy_assign_ << " ma=" << f_move_assign_ << " d=" << f_destruct_ + << std::endl; +} + +TEST(PhTreeTest, TestFilterAPI_FOR_EACH) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + CallbackCount<3> callback; + FilterCount<3, Id> filter{}; + // rvalue + tree.for_each(callback, filter); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // lvalue + tree.for_each(CallbackCount<3>(), FilterCount<3, Id>()); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_LE(1, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + CallbackCount<3> callbackC; + FilterConst<3, Id> filterC; + treeC.for_each(callbackC, filterC); + // rvalue + treeC.for_each(CallbackConst<3>{}, FilterConst<3, Id>()); + f_reset_id_counters(); +} + +TEST(PhTreeTest, TestFilterAPI_FOR_EACH_WQ) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + TestTree<3, Id>::QueryBox qb{{1, 2, 3}, {4, 5, 6}}; + CallbackCount<3> callback; + FilterCount<3, Id> filter{}; + // lvalue + tree.for_each(qb, callback, filter); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + tree.for_each({{1, 2, 3}, {4, 5, 6}}, CallbackCount<3>{}, FilterCount<3, Id>()); + ASSERT_EQ(static_id, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_LE(1, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + treeC.for_each(qb, callback, filterC); + // rvalue + treeC.for_each({{1, 2, 3}, {4, 5, 6}}, CallbackConst<3>(), FilterConst<3, Id>()); + f_reset_id_counters(); +} + +TEST(PhTreeTest, TestFilterAPI_BEGIN) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + FilterCount<3, Id> filter{}; + // lvalue + ASSERT_EQ(tree.begin(filter)->_i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + ASSERT_EQ(tree.begin(FilterCount<3, Id>())->_i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_LE(1, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + ASSERT_EQ(treeC.begin(filterC)->_i, 1); + // rvalue + ASSERT_EQ(treeC.begin(FilterConst<3, Id>())->_i, 1); + f_reset_id_counters(); +} + +TEST(PhTreeTest, TestFilterAPI_WQ) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + TestTree<3, Id>::QueryBox qb{{1, 2, 3}, {4, 5, 6}}; + FilterCount<3, Id> filter{}; + // lvalue + ASSERT_EQ(tree.begin_query(qb, filter)->_i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + ASSERT_EQ(tree.begin_query({{1, 2, 3}, {4, 5, 6}}, FilterCount<3, Id>())->_i, 1); + ASSERT_EQ(1, f_construct_ + f_default_construct_); + ASSERT_LE(1, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + ASSERT_EQ(treeC.begin_query(qb, filterC)->_i, 1); + // rvalue + ASSERT_EQ(treeC.begin_query(qb, FilterConst<3, Id>())->_i, 1); + f_reset_id_counters(); +} + +TEST(PhTreeTest, TestFilterAPI_KNN) { + // Test edge case: only one entry in tree + PhPointD<3> p{1, 2, 3}; + auto tree = TestTree<3, Id>(); + tree.emplace(p, Id{1}); + + FilterCount<3, Id> filter{}; + DistanceCount<3> dist_fn{}; + // lvalue + ASSERT_EQ(tree.begin_knn_query(3, {2, 3, 4}, dist_fn, filter)->_i, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_EQ(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue + ASSERT_EQ(tree.begin_knn_query(3, {2, 3, 4}, DistanceCount<3>{}, FilterCount<3, Id>())->_i, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_LE(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // rvalue #2 + auto a = tree.begin_knn_query, FilterCount<3, Id>>(3, {2, 3, 4})->_i; + ASSERT_EQ(a, 1); + ASSERT_EQ(2, f_construct_ + f_default_construct_); + ASSERT_LE(0, f_copy_construct_ + f_move_construct_ + f_copy_assign_ + f_move_assign_); + f_reset_id_counters(); + + // const Tree: just test that it compiles + const TestTree<3, Id>& treeC = tree; + // lvalue + FilterConst<3, Id> filterC; + ASSERT_EQ(treeC.begin_knn_query(3, {2, 3, 4}, dist_fn, filterC)->_i, 1); + // rvalue + ASSERT_EQ(treeC.begin_knn_query(3, {2, 3, 4}, DistanceCount<3>{}, FilterConst<3, Id>())->_i, 1); + f_reset_id_counters(); +} \ No newline at end of file diff --git a/phtree/phtree_test.cc b/phtree/phtree_test.cc index 7a7abfe7..4c17befe 100644 --- a/phtree/phtree_test.cc +++ b/phtree/phtree_test.cc @@ -95,12 +95,10 @@ struct Id { } bool operator==(const Id& rhs) const { - ++copy_assign_count_; return _i == rhs._i; } bool operator==(Id&& rhs) const { - ++move_assign_count_; return _i == rhs._i; } diff --git a/phtree/v16/for_each.h b/phtree/v16/for_each.h index 807c63ac..7a97b537 100644 --- a/phtree/v16/for_each.h +++ b/phtree/v16/for_each.h @@ -26,7 +26,7 @@ namespace improbable::phtree::v16 { * Iterates over the whole tree. Entries and child nodes that are rejected by the Filter are not * traversed or returned. */ -template +template class ForEach { static constexpr dimension_t DIM = CONVERT::DimInternal; using KeyInternal = typename CONVERT::KeyInternal; @@ -34,8 +34,11 @@ class ForEach { using EntryT = Entry; public: - ForEach(const CONVERT* converter, CALLBACK_FN& callback, FILTER filter) - : converter_{converter}, callback_{callback}, filter_(std::move(filter)) {} + template + ForEach(const CONVERT* converter, CB&& callback, F&& filter) + : converter_{converter} + , callback_{std::forward(callback)} + , filter_(std::forward(filter)) {} void Traverse(const EntryT& entry) { assert(entry.IsNode()); @@ -59,7 +62,7 @@ class ForEach { } const CONVERT* converter_; - CALLBACK_FN& callback_; + CALLBACK callback_; FILTER filter_; }; } // namespace improbable::phtree::v16 diff --git a/phtree/v16/for_each_hc.h b/phtree/v16/for_each_hc.h index 02ab93cb..203969a4 100644 --- a/phtree/v16/for_each_hc.h +++ b/phtree/v16/for_each_hc.h @@ -33,7 +33,7 @@ namespace improbable::phtree::v16 { * For details see "Efficient Z-Ordered Traversal of Hypercube Indexes" by T. Zäschke, M.C. Norrie, * 2017. */ -template +template class ForEachHC { static constexpr dimension_t DIM = CONVERT::DimInternal; using KeyInternal = typename CONVERT::KeyInternal; @@ -41,17 +41,18 @@ class ForEachHC { using EntryT = Entry; public: + template ForEachHC( const KeyInternal& range_min, const KeyInternal& range_max, const CONVERT* converter, - CALLBACK_FN& callback, - FILTER filter) + CB&& callback, + F&& filter) : range_min_{range_min} , range_max_{range_max} , converter_{converter} - , callback_{callback} - , filter_(std::move(filter)) {} + , callback_{std::forward(callback)} + , filter_(std::forward(filter)) {} void Traverse(const EntryT& entry) { assert(entry.IsNode()); @@ -84,7 +85,7 @@ class ForEachHC { } } - bool CheckNode(const EntryT& entry, bit_width_t parent_postfix_len) const { + bool CheckNode(const EntryT& entry, bit_width_t parent_postfix_len) { const KeyInternal& key = entry.GetKey(); // Check if the node overlaps with the query box. // An infix with len=0 implies that at least part of the child node overlaps with the query, @@ -169,7 +170,7 @@ class ForEachHC { const KeyInternal range_min_; const KeyInternal range_max_; const CONVERT* converter_; - CALLBACK_FN& callback_; + CALLBACK callback_; FILTER filter_; }; } // namespace improbable::phtree::v16 diff --git a/phtree/v16/iterator_base.h b/phtree/v16/iterator_base.h index b806a799..9409e58b 100644 --- a/phtree/v16/iterator_base.h +++ b/phtree/v16/iterator_base.h @@ -92,8 +92,9 @@ class IteratorWithFilter using EntryT = Entry; public: - explicit IteratorWithFilter(const CONVERT* converter, FILTER filter) noexcept - : IteratorBase(nullptr), converter_{converter}, filter_(std::forward(filter)) {} + template + explicit IteratorWithFilter(const CONVERT* converter, F&& filter) noexcept + : IteratorBase(nullptr), converter_{converter}, filter_(std::forward(filter)) {} explicit IteratorWithFilter(const EntryT* current_result, const CONVERT* converter) noexcept : IteratorBase(current_result), converter_{converter}, filter_{FILTER()} {} @@ -102,8 +103,12 @@ class IteratorWithFilter return converter_->post(this->current_result_->GetKey()); } + auto& __Filter() { + return filter_; + } + protected: - [[nodiscard]] bool ApplyFilter(const EntryT& entry) const { + [[nodiscard]] bool ApplyFilter(const EntryT& entry) { return entry.IsNode() ? filter_.IsNodeValid(entry.GetKey(), entry.GetNodePostfixLen() + 1) : filter_.IsEntryValid(entry.GetKey(), entry.GetValue()); } diff --git a/phtree/v16/iterator_full.h b/phtree/v16/iterator_full.h index 7dbd401a..37531a63 100644 --- a/phtree/v16/iterator_full.h +++ b/phtree/v16/iterator_full.h @@ -33,8 +33,11 @@ class IteratorFull : public IteratorWithFilter { using EntryT = typename IteratorWithFilter::EntryT; public: - IteratorFull(const EntryT& root, const CONVERT* converter, FILTER filter) - : IteratorWithFilter(converter, filter), stack_{}, stack_size_{0} { + template + IteratorFull(const EntryT& root, const CONVERT* converter, F&& filter) + : IteratorWithFilter(converter, std::forward(filter)) + , stack_{} + , stack_size_{0} { PrepareAndPush(root.GetNode()); FindNextElement(); } diff --git a/phtree/v16/iterator_hc.h b/phtree/v16/iterator_hc.h index c1467fd6..ecedef7e 100644 --- a/phtree/v16/iterator_hc.h +++ b/phtree/v16/iterator_hc.h @@ -49,13 +49,14 @@ class IteratorHC : public IteratorWithFilter { using EntryT = typename IteratorWithFilter::EntryT; public: + template IteratorHC( const EntryT& root, const KeyInternal& range_min, const KeyInternal& range_max, const CONVERT* converter, - FILTER filter) - : IteratorWithFilter(converter, filter) + F&& filter) + : IteratorWithFilter(converter, std::forward(filter)) , stack_size_{0} , range_min_{range_min} , range_max_{range_max} { diff --git a/phtree/v16/iterator_knn_hs.h b/phtree/v16/iterator_knn_hs.h index 1ffc13d9..ca8aac80 100644 --- a/phtree/v16/iterator_knn_hs.h +++ b/phtree/v16/iterator_knn_hs.h @@ -53,20 +53,21 @@ class IteratorKnnHS : public IteratorWithFilter { using EntryDistT = EntryDist; public: + template explicit IteratorKnnHS( const EntryT& root, size_t min_results, const KeyInternal& center, const CONVERT* converter, - DISTANCE dist, - FILTER filter) - : IteratorWithFilter(converter, filter) + DIST&& dist, + F&& filter) + : IteratorWithFilter(converter, std::forward(filter)) , center_{center} , center_post_{converter->post(center)} , current_distance_{std::numeric_limits::max()} , num_found_results_(0) , num_requested_results_(min_results) - , distance_(std::move(dist)) { + , distance_(std::forward(dist)) { if (min_results <= 0 || root.GetNode().GetEntryCount() == 0) { this->SetFinished(); return; diff --git a/phtree/v16/phtree_v16.h b/phtree/v16/phtree_v16.h index 7bee3057..fb95ce0f 100644 --- a/phtree/v16/phtree_v16.h +++ b/phtree/v16/phtree_v16.h @@ -289,9 +289,18 @@ class PhTreeV16 { * sub-nodes before they are returned or traversed. Any filter function must follow the * signature of the default 'FilterNoOp`. */ - template - void for_each(CALLBACK_FN& callback, FILTER filter = FILTER()) const { - ForEach(converter_, callback, filter).Traverse(root_); + template + void for_each(CALLBACK&& callback, FILTER&& filter = FILTER()) { + ForEach( + converter_, std::forward(callback), std::forward(filter)) + .Traverse(root_); + } + + template + void for_each(CALLBACK&& callback, FILTER&& filter = FILTER()) const { + ForEach( + converter_, std::forward(callback), std::forward(filter)) + .Traverse(root_); } /* @@ -304,13 +313,18 @@ class PhTreeV16 { * sub-nodes before they are returned or traversed. Any filter function must follow the * signature of the default 'FilterNoOp`. */ - template + template void for_each( - const PhBox& query_box, - CALLBACK_FN& callback, - FILTER filter = FILTER()) const { - ForEachHC( - query_box.min(), query_box.max(), converter_, callback, filter) + // TODO check copy elision + const PhBox query_box, + CALLBACK&& callback, + FILTER&& filter = FILTER()) const { + ForEachHC( + query_box.min(), + query_box.max(), + converter_, + std::forward(callback), + std::forward(filter)) .Traverse(root_); } @@ -322,8 +336,8 @@ class PhTreeV16 { * @return an iterator over all (filtered) entries in the tree, */ template - auto begin(FILTER filter = FILTER()) const { - return IteratorFull(root_, converter_, filter); + auto begin(FILTER&& filter = FILTER()) const { + return IteratorFull(root_, converter_, std::forward(filter)); } /* @@ -336,9 +350,10 @@ class PhTreeV16 { * @return Result iterator. */ template - auto begin_query(const PhBox& query_box, FILTER filter = FILTER()) const { + auto begin_query( + const PhBox& query_box, FILTER&& filter = FILTER()) const { return IteratorHC( - root_, query_box.min(), query_box.max(), converter_, filter); + root_, query_box.min(), query_box.max(), converter_, std::forward(filter)); } /* @@ -359,10 +374,15 @@ class PhTreeV16 { auto begin_knn_query( size_t min_results, const KeyT& center, - DISTANCE distance_function = DISTANCE(), - FILTER filter = FILTER()) const { + DISTANCE&& distance_function = DISTANCE(), + FILTER&& filter = FILTER()) const { return IteratorKnnHS( - root_, min_results, center, converter_, distance_function, filter); + root_, + min_results, + center, + converter_, + std::forward(distance_function), + std::forward(filter)); } /* From 899716d7346950822b2cfafe08439073b2ab4ed0 Mon Sep 17 00:00:00 2001 From: Tilmann Date: Fri, 8 Apr 2022 20:34:45 +0200 Subject: [PATCH 15/16] FilterSphere issue (#28) --- CHANGELOG.md | 1 + phtree/common/filter.h | 2 +- phtree/phtree_d_test_filter.cc | 73 +++++++++++++++++++++++++++++++++- 3 files changed, 74 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 87989b8b..25487e89 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] ### Changed +- Bugfix: FilterSphere was not working correctly. [#27](https://github.com/tzaeschke/phtree-cpp/issues/27) - Potentially **BREAKING CHANGE**: Refactored API of all methods that accept callbacks and filters to accept universal/forwarding references. Also changed filters and callback to not require `const` methods. diff --git a/phtree/common/filter.h b/phtree/common/filter.h index 3a3e30f0..fd54d29c 100644 --- a/phtree/common/filter.h +++ b/phtree/common/filter.h @@ -213,7 +213,7 @@ class FilterSphere { private: const KeyExternal center_external_; - const KeyExternal center_internal_; + const KeyInternal center_internal_; const ScalarExternal radius_; const CONVERTER converter_; const DISTANCE distance_function_; diff --git a/phtree/phtree_d_test_filter.cc b/phtree/phtree_d_test_filter.cc index d7c68d75..cc24d96d 100644 --- a/phtree/phtree_d_test_filter.cc +++ b/phtree/phtree_d_test_filter.cc @@ -240,6 +240,7 @@ struct CallbackConst { } }; +[[maybe_unused]] static void print_id_counters() { std::cout << "dc=" << f_default_construct_ << " c=" << f_construct_ << " cc=" << f_copy_construct_ << " mc=" << f_move_construct_ @@ -407,4 +408,74 @@ TEST(PhTreeTest, TestFilterAPI_KNN) { // rvalue ASSERT_EQ(treeC.begin_knn_query(3, {2, 3, 4}, DistanceCount<3>{}, FilterConst<3, Id>())->_i, 1); f_reset_id_counters(); -} \ No newline at end of file +} + +template +double distance(const TestPoint& p1, const TestPoint& p2) { + double sum2 = 0; + for (dimension_t i = 0; i < DIM; ++i) { + double d2 = p1[i] - p2[i]; + sum2 += d2 * d2; + } + return sqrt(sum2); +}; + +template +void referenceSphereQuery( + std::vector>& points, + TestPoint& center, + double radius, + std::set& result) { + for (size_t i = 0; i < points.size(); i++) { + auto& p = points[i]; + if (distance(center, p) <= radius) { + result.insert(i); + } + } +} + +// We use 'int&' because gtest does not compile with assertions in non-void functions. +template +void testSphereQuery(TestPoint& center, double radius, size_t N, int& result) { + TestTree tree; + std::vector> points; + populate(tree, points, N); + + std::set referenceResult; + referenceSphereQuery(points, center, radius, referenceResult); + + result = 0; + auto filter = FilterSphere(center, radius, tree.converter()); + for (auto it = tree.begin(filter); it != tree.end(); it++) { + auto& x = *it; + ASSERT_GE(x, 0); + ASSERT_EQ(referenceResult.count(x), 1); + result++; + } + ASSERT_EQ(referenceResult.size(), result); +} + +TEST(PhTreeDTest, TestSphereQuery0) { + const dimension_t dim = 3; + TestPoint p{-10000, -10000, -10000}; + int n = 0; + testSphereQuery(p, 0.1, 100, n); + ASSERT_EQ(0, n); +} + +TEST(PhTreeDTest, TestSphereQueryMany) { + const dimension_t dim = 3; + TestPoint p{0, 0, 0}; + int n = 0; + testSphereQuery(p, 1000, 1000, n); + ASSERT_GT(n, 400); + ASSERT_LT(n, 800); +} + +TEST(PhTreeDTest, TestSphereQueryAll) { + const dimension_t dim = 3; + TestPoint p{0, 0, 0}; + int n = 0; + testSphereQuery(p, 10000, 1000, n); + ASSERT_EQ(1000, n); +} From 1d1ba76bc719a6ae36eacb6e7c34db7a42504ac5 Mon Sep 17 00:00:00 2001 From: Til Date: Thu, 14 Apr 2022 16:58:24 +0200 Subject: [PATCH 16/16] Initial --- CHANGELOG.md | 7 ++++++- CMakeLists.txt | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 25487e89..16aada8e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] ### Changed +- Nothing yet + +## [1.2.0] - 2022-04-14 +### Changed - Bugfix: FilterSphere was not working correctly. [#27](https://github.com/tzaeschke/phtree-cpp/issues/27) - Potentially **BREAKING CHANGE**: Refactored API of all methods that accept callbacks and filters to accept universal/forwarding references. @@ -92,7 +96,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Nothing. -[Unreleased]: https://github.com/improbable-eng/phtree-cpp/compare/v1.1.1...HEAD +[Unreleased]: https://github.com/improbable-eng/phtree-cpp/compare/v1.2.0...HEAD +[1.2.0]: https://github.com/improbable-eng/phtree-cpp/compare/v1.2.0...v1.1.0 [1.1.1]: https://github.com/improbable-eng/phtree-cpp/compare/v1.1.0...v1.1.1 [1.1.0]: https://github.com/improbable-eng/phtree-cpp/compare/v1.0.0...v1.1.0 [1.0.1]: https://github.com/improbable-eng/phtree-cpp/compare/v1.0.0...v1.0.1 diff --git a/CMakeLists.txt b/CMakeLists.txt index 18a5da8a..fa78f1ee 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.14) # set the project name -project(PH_Tree_Main VERSION 1.1.1 +project(PH_Tree_Main VERSION 1.2.0 DESCRIPTION "PH-Tree C++" LANGUAGES CXX)