diff --git a/CHANGELOG.md b/CHANGELOG.md index f0b91ada041..5467bca75ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ ### Internals * Fix several crashes when running the object store benchmarks ([#7403](https://github.com/realm/realm-core/pull/7403)). +* Remove SetElementEquals and SetElementLessThan, as Mixed now uses the same comparisons as Set did. ---------------------------------------------- diff --git a/src/realm/bplustree.hpp b/src/realm/bplustree.hpp index c1d284a9116..6db1ffafc34 100644 --- a/src/realm/bplustree.hpp +++ b/src/realm/bplustree.hpp @@ -207,6 +207,7 @@ class BPlusTreeBase { virtual void erase(size_t) = 0; virtual void clear() = 0; + virtual void swap(size_t, size_t) = 0; void create(); void destroy(); @@ -416,7 +417,7 @@ class BPlusTree : public BPlusTreeBase { m_root->bptree_access(n, func); } - void swap(size_t ndx1, size_t ndx2) + void swap(size_t ndx1, size_t ndx2) override { if constexpr (std::is_same_v || std::is_same_v) { struct SwapBuffer { diff --git a/src/realm/set.cpp b/src/realm/set.cpp index 2e822d1260a..7ef6b07d7b1 100644 --- a/src/realm/set.cpp +++ b/src/realm/set.cpp @@ -55,7 +55,7 @@ void SetBase::clear_repl(Replication* repl) const static std::vector convert_to_set(const CollectionBase& rhs) { std::vector mixed(rhs.begin(), rhs.end()); - std::sort(mixed.begin(), mixed.end(), SetElementLessThan()); + std::sort(mixed.begin(), mixed.end()); mixed.erase(std::unique(mixed.begin(), mixed.end()), mixed.end()); return mixed; } @@ -72,7 +72,7 @@ bool SetBase::is_subset_of(const CollectionBase& rhs) const template bool SetBase::is_subset_of(It1 first, It2 last) const { - return std::includes(first, last, begin(), end(), SetElementLessThan{}); + return std::includes(first, last, begin(), end()); } bool SetBase::is_strict_subset_of(const CollectionBase& rhs) const @@ -96,7 +96,7 @@ bool SetBase::is_superset_of(const CollectionBase& rhs) const template bool SetBase::is_superset_of(It1 first, It2 last) const { - return std::includes(begin(), end(), first, last, SetElementLessThan{}); + return std::includes(begin(), end(), first, last); } bool SetBase::is_strict_superset_of(const CollectionBase& rhs) const @@ -120,13 +120,12 @@ bool SetBase::intersects(const CollectionBase& rhs) const template bool SetBase::intersects(It1 first, It2 last) const { - SetElementLessThan less; auto it = begin(); while (it != end() && first != last) { - if (less(*it, *first)) { + if (*it < *first) { ++it; } - else if (less(*first, *it)) { + else if (*first < *it) { ++first; } else { @@ -161,7 +160,7 @@ template void SetBase::assign_union(It1 first, It2 last) { std::vector the_diff; - std::set_difference(first, last, begin(), end(), std::back_inserter(the_diff), SetElementLessThan{}); + std::set_difference(first, last, begin(), end(), std::back_inserter(the_diff)); // 'the_diff' now contains all the elements that are in foreign set, but not in 'this' // Now insert those elements for (auto&& value : the_diff) { @@ -185,7 +184,7 @@ template void SetBase::assign_intersection(It1 first, It2 last) { std::vector intersection; - std::set_intersection(first, last, begin(), end(), std::back_inserter(intersection), SetElementLessThan{}); + std::set_intersection(first, last, begin(), end(), std::back_inserter(intersection)); clear(); // Elements in intersection comes from foreign set, so ok to use here for (auto&& value : intersection) { @@ -210,7 +209,7 @@ template void SetBase::assign_difference(It1 first, It2 last) { std::vector intersection; - std::set_intersection(first, last, begin(), end(), std::back_inserter(intersection), SetElementLessThan{}); + std::set_intersection(first, last, begin(), end(), std::back_inserter(intersection)); // 'intersection' now contains all the elements that are in both foreign set and 'this'. // Remove those elements. The elements comes from the foreign set, so ok to refer to. for (auto&& value : intersection) { @@ -235,9 +234,9 @@ template void SetBase::assign_symmetric_difference(It1 first, It2 last) { std::vector difference; - std::set_difference(first, last, begin(), end(), std::back_inserter(difference), SetElementLessThan{}); + std::set_difference(first, last, begin(), end(), std::back_inserter(difference)); std::vector intersection; - std::set_intersection(first, last, begin(), end(), std::back_inserter(intersection), SetElementLessThan{}); + std::set_intersection(first, last, begin(), end(), std::back_inserter(intersection)); // Now remove the common elements and add the differences for (auto&& value : intersection) { erase_any(value); @@ -300,6 +299,31 @@ bool SetBase::do_init_from_parent(ref_type ref, bool allow_create) const return true; } +void SetBase::resort_range(size_t start, size_t end) +{ + if (end > size()) { + end = size(); + } + if (start >= end) { + return; + } + std::vector indices; + indices.resize(end - start); + std::iota(indices.begin(), indices.end(), 0); + std::sort(indices.begin(), indices.end(), [&](auto a, auto b) { + return get_any(a + start) < get_any(b + start); + }); + for (size_t i = 0; i < indices.size(); ++i) { + if (indices[i] != i) { + m_tree->swap(i + start, start + indices[i]); + auto it = std::find(indices.begin() + i, indices.end(), i); + REALM_ASSERT(it != indices.end()); + *it = indices[i]; + indices[i] = i; + } + } +} + /********************************* Set *********************************/ template <> @@ -445,32 +469,6 @@ void Set::migrate() } } -template -void Set::do_resort(size_t start, size_t end) -{ - if (end > size()) { - end = size(); - } - if (start >= end) { - return; - } - std::vector indices; - indices.resize(end - start); - std::iota(indices.begin(), indices.end(), 0); - std::sort(indices.begin(), indices.end(), [&](auto a, auto b) { - return get(a + start) < get(b + start); - }); - for (size_t i = 0; i < indices.size(); ++i) { - if (indices[i] != i) { - tree().swap(i + start, start + indices[i]); - auto it = std::find(indices.begin() + i, indices.end(), i); - REALM_ASSERT(it != indices.end()); - *it = indices[i]; - indices[i] = i; - } - } -} - template <> void Set::migration_resort() { @@ -479,21 +477,21 @@ void Set::migration_resort() auto last_binary = std::partition_point(first_string, end(), [](const Mixed& item) { return item.is_type(type_String, type_Binary); }); - do_resort(first_string.index(), last_binary.index()); + resort_range(first_string.index(), last_binary.index()); } template <> void Set::migration_resort() { // sort order of strings changed - do_resort(0, size()); + resort_range(0, size()); } template <> void Set::migration_resort() { // sort order of binaries changed - do_resort(0, size()); + resort_range(0, size()); } void LnkSet::remove_target_row(size_t link_ndx) @@ -526,7 +524,6 @@ void LnkSet::to_json(std::ostream& out, JSONOutputMode, util::FunctionRef& indices, bool ascending) { indices.resize(sz); @@ -537,48 +534,4 @@ void set_sorted_indices(size_t sz, std::vector& indices, bool ascending) std::iota(indices.rbegin(), indices.rend(), 0); } } - -template -static bool partition_points(const Set& set, std::vector& indices, Iterator& first_string, - Iterator& first_binary, Iterator& end) -{ - first_string = std::partition_point(indices.begin(), indices.end(), [&](size_t i) { - return set.get(i).is_type(type_Bool, type_Int, type_Float, type_Double, type_Decimal); - }); - if (first_string == indices.end() || !set.get(*first_string).is_type(type_String)) - return false; - first_binary = std::partition_point(first_string + 1, indices.end(), [&](size_t i) { - return set.get(i).is_type(type_String); - }); - if (first_binary == indices.end() || !set.get(*first_binary).is_type(type_Binary)) - return false; - end = std::partition_point(first_binary + 1, indices.end(), [&](size_t i) { - return set.get(i).is_type(type_Binary); - }); - return true; -} - -template <> -void Set::sort(std::vector& indices, bool ascending) const -{ - set_sorted_indices(size(), indices, true); - - // The on-disk order is bool -> numbers -> string -> binary -> others - // We want to merge the string and binary sections to match the sort order - // of other collections. To do this we find the three partition points - // where the first string occurs, the first binary occurs, and the first - // non-binary after binaries occurs. If there's no strings or binaries we - // don't have to do anything. If they're both non-empty, we perform an - // in-place merge on the strings and binaries. - std::vector::iterator first_string, first_binary, end; - if (partition_points(*this, indices, first_string, first_binary, end)) { - std::inplace_merge(first_string, first_binary, end, [&](auto a, auto b) { - return get(a) < get(b); - }); - } - if (!ascending) { - std::reverse(indices.begin(), indices.end()); - } -} - } // namespace realm diff --git a/src/realm/set.hpp b/src/realm/set.hpp index 7c6ce92e083..d9d989ea1ed 100644 --- a/src/realm/set.hpp +++ b/src/realm/set.hpp @@ -59,6 +59,8 @@ class SetBase : public CollectionBase { static std::vector convert_to_mixed_set(const CollectionBase& rhs); bool do_init_from_parent(ref_type ref, bool allow_create) const; + void resort_range(size_t from, size_t to); + REALM_COLD REALM_NORETURN void throw_invalid_null() { throw InvalidArgument(ErrorCodes::PropertyNotNullable, @@ -227,7 +229,6 @@ class Set final : public CollectionBaseImpl { void do_insert(size_t ndx, T value); void do_erase(size_t ndx); void do_clear(); - void do_resort(size_t from, size_t to); iterator find_impl(const T& value) const; }; @@ -412,91 +413,6 @@ void Set::migration_resort(); template <> void Set::migration_resort(); -/// Compare set elements. -/// -/// We cannot use `std::less<>` directly, because the ordering of set elements -/// impacts the file format. For primitive types this is trivial (and can indeed -/// be just `std::less<>`), but for example `Mixed` has specialized comparison -/// that defines equality of numeric types. -template -struct SetElementLessThan { - bool operator()(const T& a, const T& b) const noexcept - { - // CAUTION: This routine is technically part of the file format, because - // it determines the storage order of Set elements. - return a < b; - } -}; - -template -struct SetElementEquals { - bool operator()(const T& a, const T& b) const noexcept - { - // CAUTION: This routine is technically part of the file format, because - // it determines the storage order of Set elements. - return a == b; - } -}; - -template <> -struct SetElementLessThan { - bool operator()(const Mixed& a, const Mixed& b) const noexcept - { - // CAUTION: This routine is technically part of the file format, because - // it determines the storage order of Set elements. - - // These are the rules for comparison of Mixed types in a Set: - // - If both values are null they are equal - // - If only one value is null, that value is lesser than the other - // - All numeric types are compared as the corresponding real numbers - // would compare. So integer 3 equals double 3. - // - String and binary types are compared using lexicographical comparison. - // - All other types are compared using the comparison operators defined - // for the types. - // - If two values have different types, the rank of the types are compared. - // the rank is as follows: - // boolean - // numeric - // string - // binary - // Timestamp - // ObjectId - // UUID - // TypedLink - // Link - // - // The current Mixed::compare function implements these rules except when comparing - // string and binary. If that function is changed we should either implement the rules - // here or upgrade all Set columns. - if (a.is_type(type_String) && b.is_type(type_Binary)) { - return true; - } - if (a.is_type(type_Binary) && b.is_type(type_String)) { - return false; - } - return a.compare(b) < 0; - } -}; - -template <> -struct SetElementEquals { - bool operator()(const Mixed& a, const Mixed& b) const noexcept - { - // CAUTION: This routine is technically part of the file format, because - // it determines the storage order of Set elements. - - // See comments above - - if (a.is_type(type_String) && b.is_type(type_Binary)) { - return false; - } - if (a.is_type(type_Binary) && b.is_type(type_String)) { - return false; - } - return a.compare(b) == 0; - } -}; - inline SetBase::SetBase(const SetBase& other) : CollectionBase(other) { @@ -656,7 +572,7 @@ template size_t Set::find(T value) const { auto it = find_impl(value); - if (it != end() && SetElementEquals{}(*it, value)) { + if (it != end() && *it == value) { return it.index(); } return npos; @@ -686,7 +602,7 @@ REALM_NOINLINE auto Set::find_impl(const T& value) const -> iterator { auto b = this->begin(); auto e = this->end(); // Note: This ends up calling `update_if_needed()`. - return std::lower_bound(b, e, value, SetElementLessThan{}); + return std::lower_bound(b, e, value); } template @@ -698,7 +614,7 @@ std::pair Set::insert(T value) throw_invalid_null(); auto it = find_impl(value); - if (it != this->end() && SetElementEquals{}(*it, value)) { + if (it != this->end() && *it == value) { return {it.index(), false}; } @@ -735,7 +651,7 @@ std::pair Set::erase(T value) { auto it = find_impl(value); // Note: This ends up calling `update_if_needed()`. - if (it == end() || !SetElementEquals{}(*it, value)) { + if (it == end() || *it != value) { return {npos, false}; } @@ -844,9 +760,6 @@ inline void Set::sort(std::vector& indices, bool ascending) const set_sorted_indices(sz, indices, ascending); } -template <> -void Set::sort(std::vector& indices, bool ascending) const; - template void Set::distinct(std::vector& indices, util::Optional sort_order) const { diff --git a/test/util/compare_groups.cpp b/test/util/compare_groups.cpp index a4a44e5b2ab..a6e7613e50f 100644 --- a/test/util/compare_groups.cpp +++ b/test/util/compare_groups.cpp @@ -108,7 +108,7 @@ bool compare_arrays(T& a, T& b, Cmp equals = Cmp{}) template bool compare_set_values(const Set& a, const Set& b) { - return compare_arrays(a, b, SetElementEquals{}); + return compare_arrays(a, b); } bool compare_dictionaries(const Dictionary& a, const Dictionary& b)