diff --git a/CHANGELOG.md b/CHANGELOG.md index b24470d267..0539d3fa2a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # NEXT RELEASE ### Enhancements +* Improve sync bootstrap performance by reducing the number of table selections in the replication logs for embedded objects. ([#7945](https://github.com/realm/realm-core/issues/7945)) * (PR [#????](https://github.com/realm/realm-core/pull/????)) * Client reset cycle detection now checks if the previous recovery attempt was made by the same core version, and if not attempts recovery again ([PR #7944](https://github.com/realm/realm-core/pull/7944)). diff --git a/src/realm/replication.cpp b/src/realm/replication.cpp index 350eb61fd0..26826fb499 100644 --- a/src/realm/replication.cpp +++ b/src/realm/replication.cpp @@ -140,19 +140,20 @@ void Replication::erase_column(const Table* t, ColKey col_key) m_encoder.erase_column(col_key); // Throws } -void Replication::track_new_object(ObjKey key) +void Replication::track_new_object(const Table* table, ObjKey key) { - m_selected_obj = key; - m_selected_collection = CollectionId(); - m_newly_created_object = true; + if (table == m_selected_table) { + m_selected_obj = key; + m_selected_obj_is_newly_created = true; + } - auto table_index = m_selected_table->get_index_in_group(); + auto table_index = table->get_index_in_group(); if (table_index >= m_most_recently_created_object.size()) { if (table_index >= m_most_recently_created_object.capacity()) m_most_recently_created_object.reserve(table_index * 2); m_most_recently_created_object.resize(table_index + 1); } - m_most_recently_created_object[table_index] = m_selected_obj; + m_most_recently_created_object[table_index] = key; } void Replication::create_object(const Table* t, GlobalKey id) @@ -162,7 +163,7 @@ void Replication::create_object(const Table* t, GlobalKey id) } select_table(t); // Throws m_encoder.create_object(id.get_local_key(0)); // Throws - track_new_object(id.get_local_key(0)); // Throws + track_new_object(t, id.get_local_key(0)); // Throws } void Replication::create_object_with_primary_key(const Table* t, ObjKey key, Mixed pk) @@ -173,13 +174,12 @@ void Replication::create_object_with_primary_key(const Table* t, ObjKey key, Mix } select_table(t); // Throws m_encoder.create_object(key); // Throws - track_new_object(key); + track_new_object(t, key); } void Replication::create_linked_object(const Table* t, ObjKey key) { - select_table(t); // Throws - track_new_object(key); // Throws + track_new_object(t, key); // Throws // Does not need to encode anything as embedded tables can't be observed } @@ -207,30 +207,37 @@ void Replication::do_select_table(const Table* table) m_selected_table = table; m_selected_collection = CollectionId(); m_selected_obj = ObjKey(); + m_selected_obj_is_newly_created = false; } -void Replication::do_select_obj(ObjKey key) +bool Replication::check_for_newly_created_object(ObjKey key, const Table* table) { - m_selected_obj = key; - m_selected_collection = CollectionId(); - - auto table_index = m_selected_table->get_index_in_group(); + auto table_index = table->get_index_in_group(); if (table_index < m_most_recently_created_object.size()) { - m_newly_created_object = m_most_recently_created_object[table_index] == key; + return m_most_recently_created_object[table_index] == key; } - else { - m_newly_created_object = false; + return false; +} + +bool Replication::do_select_obj(ObjKey key, const Table* table) +{ + bool newly_created = check_for_newly_created_object(key, table); + if (!newly_created) { + select_table(table); + m_selected_obj = key; + m_selected_obj_is_newly_created = false; + m_selected_collection = CollectionId(); } if (auto logger = would_log(LogLevel::debug)) { - auto class_name = m_selected_table->get_class_name(); - if (m_selected_table->get_primary_key_column()) { - auto pk = m_selected_table->get_primary_key(key); + auto class_name = table->get_class_name(); + if (table->get_primary_key_column()) { + auto pk = table->get_primary_key(key); logger->log(LogCategory::object, LogLevel::debug, "Mutating object '%1' with primary key %2", class_name, pk); } - else if (m_selected_table->is_embedded()) { - auto obj = m_selected_table->get_object(key); + else if (table->is_embedded()) { + auto obj = table->get_object(key); logger->log(LogCategory::object, LogLevel::debug, "Mutating object '%1' with path '%2'", class_name, obj.get_id()); } @@ -238,26 +245,21 @@ void Replication::do_select_obj(ObjKey key) logger->log(LogCategory::object, LogLevel::debug, "Mutating anonymous object '%1'[%2]", class_name, key); } } + return newly_created; } void Replication::do_select_collection(const CollectionBase& coll) { - select_table(coll.get_table().unchecked_ptr()); - ColKey col_key = coll.get_col_key(); - ObjKey key = coll.get_owner_key(); - auto path = coll.get_stable_path(); - - if (select_obj(key)) { - m_encoder.select_collection(col_key, key, path); // Throws + if (select_obj(coll.get_owner_key(), coll.get_table().unchecked_ptr())) { + m_encoder.select_collection(coll.get_col_key(), coll.get_owner_key(), coll.get_stable_path()); // Throws + m_selected_collection = CollectionId(coll); } - m_selected_collection = CollectionId(coll.get_table()->get_key(), key, std::move(path)); } void Replication::do_set(const Table* t, ColKey col_key, ObjKey key, _impl::Instruction variant) { if (variant != _impl::Instruction::instr_SetDefault) { - select_table(t); // Throws - if (select_obj(key)) { + if (select_obj(key, t)) { // Throws m_encoder.modify_object(col_key, key); // Throws } } @@ -294,8 +296,7 @@ void Replication::set(const Table* t, ColKey col_key, ObjKey key, Mixed value, _ void Replication::nullify_link(const Table* t, ColKey col_key, ObjKey key) { - select_table(t); // Throws - if (select_obj(key)) { + if (select_obj(key, t)) { // Throws m_encoder.modify_object(col_key, key); // Throws } if (auto logger = would_log(LogLevel::trace)) { @@ -311,10 +312,10 @@ void Replication::add_int(const Table* t, ColKey col_key, ObjKey key, int_fast64 } } -Path Replication::get_prop_name(Path&& path) const +Path Replication::get_prop_name(ConstTableRef table, Path&& path) const { auto col_key = path[0].get_col_key(); - auto prop_name = m_selected_table->get_column_name(col_key); + auto prop_name = table->get_column_name(col_key); path[0] = PathElement(prop_name); return std::move(path); } @@ -328,14 +329,15 @@ void Replication::log_collection_operation(const char* operation, const Collecti auto path = collection.get_short_path(); auto col_key = path[0].get_col_key(); - auto prop_name = m_selected_table->get_column_name(col_key); + ConstTableRef table = collection.get_table(); + auto prop_name = table->get_column_name(col_key); path[0] = PathElement(prop_name); std::string position; if (!index.is_null()) { position = util::format(" at position %1", index); } if (Table::is_link_type(col_key.get_type()) && value.is_type(type_Link)) { - auto target_table = m_selected_table->get_opposite_table(col_key); + auto target_table = table->get_opposite_table(col_key); if (target_table->is_embedded()) { logger->log(LogCategory::object, LogLevel::trace, " %1 embedded object '%2' in %3%4 ", operation, target_table->get_class_name(), path, position); @@ -381,7 +383,7 @@ void Replication::list_erase(const CollectionBase& list, size_t link_ndx) } if (auto logger = would_log(LogLevel::trace)) { logger->log(LogCategory::object, LogLevel::trace, " Erase '%1' at position %2", - get_prop_name(list.get_short_path()), link_ndx); + get_prop_name(list.get_table(), list.get_short_path()), link_ndx); } } @@ -392,7 +394,7 @@ void Replication::list_move(const CollectionBase& list, size_t from_link_ndx, si } if (auto logger = would_log(LogLevel::trace)) { logger->log(LogCategory::object, LogLevel::trace, " Move %1 to %2 in '%3'", from_link_ndx, to_link_ndx, - get_prop_name(list.get_short_path())); + get_prop_name(list.get_table(), list.get_short_path())); } } @@ -417,7 +419,8 @@ void Replication::list_clear(const CollectionBase& list) m_encoder.collection_clear(list.size()); // Throws } if (auto logger = would_log(LogLevel::trace)) { - logger->log(LogCategory::object, LogLevel::trace, " Clear '%1'", get_prop_name(list.get_short_path())); + logger->log(LogCategory::object, LogLevel::trace, " Clear '%1'", + get_prop_name(list.get_table(), list.get_short_path())); } } @@ -428,7 +431,7 @@ void Replication::link_list_nullify(const Lst& list, size_t link_ndx) } if (auto logger = would_log(LogLevel::trace)) { logger->log(LogCategory::object, LogLevel::trace, " Nullify '%1' position %2", - m_selected_table->get_column_name(list.get_col_key()), link_ndx); + list.get_table()->get_column_name(list.get_col_key()), link_ndx); } } @@ -455,7 +458,7 @@ void Replication::dictionary_erase(const CollectionBase& dict, size_t ndx, Mixed } if (auto logger = would_log(LogLevel::trace)) { logger->log(LogCategory::object, LogLevel::trace, " Erase %1 from '%2'", key, - get_prop_name(dict.get_short_path())); + get_prop_name(dict.get_table(), dict.get_short_path())); } } @@ -465,6 +468,7 @@ void Replication::dictionary_clear(const CollectionBase& dict) m_encoder.collection_clear(dict.size()); } if (auto logger = would_log(LogLevel::trace)) { - logger->log(LogCategory::object, LogLevel::trace, " Clear '%1'", get_prop_name(dict.get_short_path())); + logger->log(LogCategory::object, LogLevel::trace, " Clear '%1'", + get_prop_name(dict.get_table(), dict.get_short_path())); } } diff --git a/src/realm/replication.hpp b/src/realm/replication.hpp index 5bae6e7c6e..41d6ff28dd 100644 --- a/src/realm/replication.hpp +++ b/src/realm/replication.hpp @@ -392,33 +392,34 @@ class Replication { util::Logger* m_logger = nullptr; const Table* m_selected_table = nullptr; ObjKey m_selected_obj; + bool m_selected_obj_is_newly_created = false; CollectionId m_selected_collection; // The ObjKey of the most recently created object for each table (indexed // by the Table's index in the group). Most insertion patterns will only // ever update the most recently created object, so this is almost as // effective as tracking all newly created objects but much cheaper. std::vector m_most_recently_created_object; - // When true, the currently selected object was created in this transaction - // and we don't need to emit instructions for mutations on it - bool m_newly_created_object = false; void unselect_all() noexcept; void select_table(const Table*); // unselects link list and obj - bool select_obj(ObjKey key); - bool select_collection(const CollectionBase&); + [[nodiscard]] bool select_obj(ObjKey key, const Table*); + [[nodiscard]] bool select_collection(const CollectionBase&); void do_select_table(const Table*); - void do_select_obj(ObjKey key); - void do_select_collection(const CollectionBase&); + [[nodiscard]] bool do_select_obj(ObjKey key, const Table*); + void do_select_collection(const CollectionBase& coll); + // When true, the currently selected object was created in this transaction + // and we don't need to emit instructions for mutations on it + bool check_for_newly_created_object(ObjKey key, const Table* table); // Mark this ObjKey as being a newly created object that should not emit // mutation instructions - void track_new_object(ObjKey); + void track_new_object(const Table*, ObjKey); void do_set(const Table*, ColKey col_key, ObjKey key, _impl::Instruction variant = _impl::instr_Set); void log_collection_operation(const char* operation, const CollectionBase& collection, Mixed value, Mixed index) const; - Path get_prop_name(Path&&) const; + Path get_prop_name(ConstTableRef, Path&&) const; size_t transact_log_size(); }; @@ -463,7 +464,7 @@ inline void Replication::unselect_all() noexcept { m_selected_table = nullptr; m_selected_collection = CollectionId(); - m_newly_created_object = false; + m_selected_obj_is_newly_created = false; } inline void Replication::select_table(const Table* table) @@ -474,18 +475,20 @@ inline void Replication::select_table(const Table* table) inline bool Replication::select_collection(const CollectionBase& coll) { + bool newly_created_object = + check_for_newly_created_object(coll.get_owner_key(), coll.get_table().unchecked_ptr()); if (CollectionId(coll) != m_selected_collection) { do_select_collection(coll); // Throws } - return !m_newly_created_object; + return !newly_created_object; } -inline bool Replication::select_obj(ObjKey key) +inline bool Replication::select_obj(ObjKey key, const Table* table) { - if (key != m_selected_obj) { - do_select_obj(key); + if (key != m_selected_obj || table != m_selected_table) { + return !do_select_obj(key, table); } - return !m_newly_created_object; + return !m_selected_obj_is_newly_created; } inline void Replication::rename_class(TableKey table_key, StringData) diff --git a/test/test_replication.cpp b/test/test_replication.cpp index 5e0fe845db..2c9396af7b 100644 --- a/test/test_replication.cpp +++ b/test/test_replication.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include "test.hpp" @@ -264,55 +265,204 @@ TEST(Replication_WriteWithoutHistory) } } -struct ObjectMutationObserver : _impl::NoOpTransactionLogParser { +struct Select { + TableKey table_key; +}; + +struct Create { + int64_t obj_key; +}; + +struct Mutate { + int64_t obj_key; + ColKey col_key; +}; + +struct Remove { + int64_t obj_key; +}; + +struct SelectColl { + int64_t obj_key; + ColKey col_key; +}; + +struct CollInsert { + size_t ndx; +}; + +struct CollSet { + size_t ndx; +}; + +using InstructionVariant = mpark::variant; + +std::ostream& print_instructions(std::ostream& os, const std::vector& ivs, + size_t first_difference) noexcept +{ + size_t ndx = 0; + for (auto& element : ivs) { + if (first_difference == ndx) { + os << "==> "; + } + util::format(os, "[%1]: ", ndx++); + auto print = overload{ + [&](Select st) { + util::format(os, "Select{%1}", st.table_key); + }, + [&](Create co) { + util::format(os, "CreateObject{%1}", co.obj_key); + }, + [&](Mutate mo) { + util::format(os, "Mutate{%1, %2}", mo.obj_key, mo.col_key); + }, + [&](Remove rm) { + util::format(os, "RemoveObject{%1}", rm.obj_key); + }, + [&](SelectColl sc) { + util::format(os, "SelectCollection{%1, %2}", sc.obj_key, sc.col_key); + }, + [&](CollInsert ci) { + util::format(os, "CollectionInsert{%1}", ci.ndx); + }, + [&](CollSet cs) { + util::format(os, "CollectionSet{%1}", cs.ndx); + }, + }; + mpark::visit(print, element); + os << '\n'; + } + return os; +} + +bool compare_instructions(const InstructionVariant& a, const InstructionVariant& b) +{ + bool equal = false; + auto comp = overload{ + [&](Select a_val) { + if (const Select* b_val = mpark::get_if