Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge release 1.2.0 from fork #36

Open
wants to merge 20 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Fix/issue 14 b tree (#15)
  • Loading branch information
tzaeschke authored Apr 1, 2022
commit 7a6b1d877e1faa38ea914c63e96eb484b9141ab9
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.

## [Unreleased]
### Changed
- DIM>8 now uses custom b_plus_tree_map instead of std::map. This improves performance for all operations, e.g.
window queries on large datasets are up to 4x faster. Benchmarks results can be found in the issue.
[#14](https://github.com/tzaeschke/phtree-cpp/issues/14)
- postfix/infix field moved from Node to Entry. This avoids indirections and improves performance of most by ~10%.
operations by 5-15%. [#11](https://github.com/tzaeschke/phtree-cpp/issues/11)
- Entries now use 'union' to store children. [#9](https://github.com/tzaeschke/phtree-cpp/issues/9)
Expand Down
1 change: 1 addition & 0 deletions LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@
identification within third-party archives.

Copyright 2020 Improbable Worlds Limited
Copyright 2022 Tilmann Zäschke

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down
76 changes: 76 additions & 0 deletions TODO.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
Fix const-ness
==============
- operator[] should have a const overload
- find() should have a non-const overload
- test:

TEST(PhTreeTest, SmokeTestConstTree) {
// Test edge case: only one entry in tree
PhPoint<3> p{1, 2, 3};
TestTree<3, Id> tree1;
tree1.emplace(p, Id{1});
tree1.emplace(p, Id{2});
Id id3{3};
tree1.insert(p, id3);
Id id4{4};
tree1.insert(p, id4);
const auto& tree = tree1;
ASSERT_EQ(tree.size(), 1);
ASSERT_EQ(tree.find(p).second()._i, 1);
ASSERT_EQ(tree[p]._i, 1);

auto q_window = tree.begin_query({p, p});
ASSERT_EQ(1, q_window->_i);
++q_window;
ASSERT_EQ(q_window, tree.end());

auto q_extent = tree.begin();
ASSERT_EQ(1, q_extent->_i);
++q_extent;
ASSERT_EQ(q_extent, tree.end());

auto q_knn = tree.begin_knn_query(10, p, DistanceEuclidean<3>());
ASSERT_EQ(1, q_knn->_i);
++q_knn;
ASSERT_EQ(q_knn, tree.end());

ASSERT_EQ(1, tree1.erase(p));
ASSERT_EQ(0, tree.size());
ASSERT_EQ(0, tree1.erase(p));
ASSERT_EQ(0, tree.size());
ASSERT_TRUE(tree.empty());
}


b_plus_tree_map - binary search
===============
Use custom binary search:

// return BptEntry* ?!?!?
template <typename E>
[[nodiscard]] auto lower_bound(key_t key, std::vector<E>& data) noexcept {
return std::lower_bound(data.begin(), data.end(), key, [](E& left, const key_t key) {
return left.first < key;
});
// auto pos = __lower_bound(&*data_leaf_.begin(), &*data_leaf_.end(), key);
// return data_leaf_.begin() + pos;
}

template <typename TT>
inline auto __lower_bound(const TT* __first, const TT* __last, key_t __val) const noexcept {
const TT* const_first = __first;
auto __len = __last - __first;

while (__len > 0) {
auto __half = __len >> 1;
const TT* __middle = __first + __half;
if (__middle->first < __val) {
__first = __middle;
++__first;
__len = __len - __half - 1;
} else
__len = __half;
}
return __first - const_first;
}

6 changes: 3 additions & 3 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@ http_archive(

http_archive(
name = "gbenchmark",
sha256 = "dccbdab796baa1043f04982147e67bb6e118fe610da2c65f88912d73987e700c",
strip_prefix = "benchmark-1.5.2",
url = "https://github.com/google/benchmark/archive/v1.5.2.tar.gz",
sha256 = "6132883bc8c9b0df5375b16ab520fac1a85dc9e4cf5be59480448ece74b278d4",
strip_prefix = "benchmark-1.6.1",
url = "https://github.com/google/benchmark/archive/v1.6.1.tar.gz",
)

http_archive(
Expand Down
60 changes: 60 additions & 0 deletions phtree/benchmark/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -304,3 +304,63 @@ cc_binary(
"@spdlog",
],
)

cc_binary(
name = "hd_insert_d_benchmark",
testonly = True,
srcs = [
"hd_insert_d_benchmark.cc",
],
linkstatic = True,
deps = [
"//phtree",
"//phtree/benchmark",
"@gbenchmark//:benchmark",
"@spdlog",
],
)

cc_binary(
name = "hd_erase_d_benchmark",
testonly = True,
srcs = [
"hd_erase_d_benchmark.cc",
],
linkstatic = True,
deps = [
"//phtree",
"//phtree/benchmark",
"@gbenchmark//:benchmark",
"@spdlog",
],
)

cc_binary(
name = "hd_query_d_benchmark",
testonly = True,
srcs = [
"hd_query_d_benchmark.cc",
],
linkstatic = True,
deps = [
"//phtree",
"//phtree/benchmark",
"@gbenchmark//:benchmark",
"@spdlog",
],
)

cc_binary(
name = "hd_knn_d_benchmark",
testonly = True,
srcs = [
"hd_knn_d_benchmark.cc",
],
linkstatic = True,
deps = [
"//phtree",
"//phtree/benchmark",
"@gbenchmark//:benchmark",
"@spdlog",
],
)
2 changes: 1 addition & 1 deletion phtree/benchmark/benchmark_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ auto CreateDuplicates =
};
} // namespace

enum TestGenerator { CUBE, CLUSTER };
enum TestGenerator { CUBE = 4, CLUSTER = 7 };

template <dimension_t DIM>
auto CreatePointDataMinMax = [](auto& points,
Expand Down
145 changes: 145 additions & 0 deletions phtree/benchmark/hd_erase_d_benchmark.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
/*
* Copyright 2020 Improbable Worlds Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "logging.h"
#include "phtree/benchmark/benchmark_util.h"
#include "phtree/phtree.h"
#include <benchmark/benchmark.h>
#include <random>

using namespace improbable;
using namespace improbable::phtree;
using namespace improbable::phtree::phbenchmark;

namespace {

const int GLOBAL_MAX = 10000;

/*
* Benchmark for removing entries.
*/
template <dimension_t DIM>
class IndexBenchmark {
public:
IndexBenchmark(benchmark::State& state);
void Benchmark(benchmark::State& state);

private:
void SetupWorld(benchmark::State& state);
void Insert(benchmark::State& state, PhTreeD<DIM, int>& tree);
void Remove(benchmark::State& state, PhTreeD<DIM, int>& tree);

const TestGenerator data_type_;
const int num_entities_;

std::default_random_engine random_engine_;
std::uniform_real_distribution<> cube_distribution_;
std::vector<PhPointD<DIM>> points_;
};

template <dimension_t DIM>
IndexBenchmark<DIM>::IndexBenchmark(benchmark::State& state)
: data_type_{static_cast<TestGenerator>(state.range(1))}
, num_entities_(state.range(0))
, random_engine_{1}
, cube_distribution_{0, GLOBAL_MAX}
, points_(state.range(0)) {
logging::SetupDefaultLogging();
SetupWorld(state);
}

template <dimension_t DIM>
void IndexBenchmark<DIM>::Benchmark(benchmark::State& state) {
for (auto _ : state) {
state.PauseTiming();
auto* tree = new PhTreeD<DIM, int>();
Insert(state, *tree);
state.ResumeTiming();

Remove(state, *tree);

state.PauseTiming();
// avoid measuring deallocation
delete tree;
state.ResumeTiming();
}
}

template <dimension_t DIM>
void IndexBenchmark<DIM>::SetupWorld(benchmark::State& state) {
logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM);
CreatePointData<DIM>(points_, data_type_, num_entities_, 0, GLOBAL_MAX);

state.counters["total_remove_count"] = benchmark::Counter(0);
state.counters["remove_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate);

logging::info("World setup complete.");
}

template <dimension_t DIM>
void IndexBenchmark<DIM>::Insert(benchmark::State&, PhTreeD<DIM, int>& tree) {
for (int i = 0; i < num_entities_; ++i) {
tree.emplace(points_[i], i);
}
}

template <dimension_t DIM>
void IndexBenchmark<DIM>::Remove(benchmark::State& state, PhTreeD<DIM, int>& tree) {
int n = 0;
for (int i = 0; i < num_entities_; ++i) {
n += tree.erase(points_[i]);
}

state.counters["total_remove_count"] += n;
state.counters["remove_rate"] += n;
}

} // namespace

template <typename... Arguments>
void PhTree6D(benchmark::State& state, Arguments&&...) {
IndexBenchmark<6> benchmark{state};
benchmark.Benchmark(state);
}

template <typename... Arguments>
void PhTree10D(benchmark::State& state, Arguments&&...) {
IndexBenchmark<10> benchmark{state};
benchmark.Benchmark(state);
}

template <typename... Arguments>
void PhTree20D(benchmark::State& state, Arguments&&...) {
IndexBenchmark<20> benchmark{state};
benchmark.Benchmark(state);
}

// index type, scenario name, data_generator, num_entities
BENCHMARK_CAPTURE(PhTree6D, ERASE, 0)
->RangeMultiplier(10)
->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}})
->Unit(benchmark::kMillisecond);

BENCHMARK_CAPTURE(PhTree10D, ERASE, 0)
->RangeMultiplier(10)
->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}})
->Unit(benchmark::kMillisecond);

BENCHMARK_CAPTURE(PhTree20D, ERASE, 0)
->RangeMultiplier(10)
->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}})
->Unit(benchmark::kMillisecond);

BENCHMARK_MAIN();
Loading