From 2a602d75941ae4d6048f32b7322ae82c61696e97 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Tue, 12 Mar 2024 12:24:54 -0700 Subject: [PATCH 1/5] Improve clsuter forming speed --- sqlitecluster/SQLiteNode.cpp | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index f47e01384..ec1792284 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -98,8 +98,8 @@ const vector SQLiteNode::_initPeers(const string& peerListString) { SINFO("Adding peer #" << peerList.size() << ": " << name << " (" << host << "), " << SComposeJSONObject(params)); SQLitePeer* peer = new SQLitePeer(name, host, params, peerList.size() + 1); - // Wait up to 2s before trying the first time - peer->nextReconnect = STimeNow() + SRandom::rand64() % (STIME_US_PER_S * 2); + // Wait up to 1s before trying the first time + peer->nextReconnect = STimeNow() + SRandom::rand64() % STIME_US_PER_S; peerList.push_back(peer); } @@ -867,8 +867,7 @@ bool SQLiteNode::update() { // See if we're taking too long if (STimeNow() > _stateTimeout) { // Timed out - SHMMM("Timed out waiting for STANDUP approval; reconnect all and re-SEARCHING."); - _reconnectAll(); + SHMMM("Timed out waiting for STANDUP approval; re-SEARCHING."); _changeState(SQLiteNodeState::SEARCHING); return true; // Re-update } @@ -1791,6 +1790,16 @@ void SQLiteNode::_onConnect(SQLitePeer* peer) { login["Version"] = _version; login["Permafollower"] = _originalPriority ? "false" : "true"; _sendToPeer(peer, login); + + // If we're STANDINGUP when a peer connects, send them a STATE message so they know they need to APPROVE or DENY the standup. + // Otherwise we will wait for their response that's not coming,and can eventually time out the standup. + if (_state == SQLiteNodeState::STANDINGUP) { + SData state("STATE"); + state["StateChangeCount"] = to_string(_stateChangeCount); + state["State"] = stateName(_state); + state["Priority"] = SToStr(_priority); + _sendToPeer(peer, state); + } } // -------------------------------------------------------------------------- From 9b3f2aa348a17ae7ecddefc8535c175fb9d72c69 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Tue, 12 Mar 2024 14:26:13 -0700 Subject: [PATCH 2/5] Fix test --- test/clustertest/tests/MassiveQueryTest.cpp | 22 +++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/test/clustertest/tests/MassiveQueryTest.cpp b/test/clustertest/tests/MassiveQueryTest.cpp index 934d31b77..611ceff42 100644 --- a/test/clustertest/tests/MassiveQueryTest.cpp +++ b/test/clustertest/tests/MassiveQueryTest.cpp @@ -14,13 +14,31 @@ struct MassiveQueryTest : tpunit::TestFixture { cmd["processTimeout"] = "290000"; cmd["writeConsistency"] = "ASYNC"; auto r1 = brtester.executeWaitMultipleData({cmd})[0]; - uint64_t commitCount = stoull(r1["CommitCount"]); + uint64_t commitCount = 0; + try { + commitCount = stoull(r1["CommitCount"]); + } catch (const invalid_argument& e) { + cout << "invalid_argument parsing commitCount from: " << r1["CommitCount"] << endl; + } catch (const out_of_range& e) { + cout << "out_of_range parsing commitCount from: " << r1["CommitCount"] << endl; + } uint64_t commitCount2 = 0; + // Make sure the commit count is actually set. + ASSERT_TRUE(commitCount); + SData status("Status"); for (size_t i = 0; i < 500; i++) { auto r2 = tester.getTester(2).executeWaitMultipleData({status})[0]; - commitCount2 = stoull(SParseJSONObject(r2.content)["CommitCount"]); + try { + commitCount2 = stoull(r1["CommitCount"]); + } catch (const invalid_argument& e) { + cout << "invalid_argument parsing commitCount2 from: " << SParseJSONObject(r2.content)["CommitCount"] << endl; + cout << r2.content << endl; + } catch (const out_of_range& e) { + cout << "out_of_range parsing commitCount2 from: " << SParseJSONObject(r2.content)["CommitCount"] << endl; + cout << r2.content << endl; + } if (commitCount2 == commitCount) { break; } From 01e19b88b2a104327fb08c2683e3920c120af1e2 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Tue, 12 Mar 2024 14:31:26 -0700 Subject: [PATCH 3/5] Oops --- test/clustertest/tests/MassiveQueryTest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/clustertest/tests/MassiveQueryTest.cpp b/test/clustertest/tests/MassiveQueryTest.cpp index 611ceff42..b112a9bcc 100644 --- a/test/clustertest/tests/MassiveQueryTest.cpp +++ b/test/clustertest/tests/MassiveQueryTest.cpp @@ -31,7 +31,7 @@ struct MassiveQueryTest : tpunit::TestFixture { for (size_t i = 0; i < 500; i++) { auto r2 = tester.getTester(2).executeWaitMultipleData({status})[0]; try { - commitCount2 = stoull(r1["CommitCount"]); + commitCount2 = stoull(r2["CommitCount"]); } catch (const invalid_argument& e) { cout << "invalid_argument parsing commitCount2 from: " << SParseJSONObject(r2.content)["CommitCount"] << endl; cout << r2.content << endl; From 1161f77295b713fec17bda108bfc6c8eb6a40831 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Tue, 12 Mar 2024 14:48:34 -0700 Subject: [PATCH 4/5] Update test --- test/clustertest/tests/MassiveQueryTest.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/test/clustertest/tests/MassiveQueryTest.cpp b/test/clustertest/tests/MassiveQueryTest.cpp index b112a9bcc..e18697e47 100644 --- a/test/clustertest/tests/MassiveQueryTest.cpp +++ b/test/clustertest/tests/MassiveQueryTest.cpp @@ -29,15 +29,17 @@ struct MassiveQueryTest : tpunit::TestFixture { SData status("Status"); for (size_t i = 0; i < 500; i++) { - auto r2 = tester.getTester(2).executeWaitMultipleData({status})[0]; + auto responseList = tester.getTester(2).executeWaitMultipleData({status}); + auto r2 = responseList[0]; + auto json = SParseJSONObject(r2.content); try { - commitCount2 = stoull(r2["CommitCount"]); + commitCount2 = stoull(json["CommitCount"]); } catch (const invalid_argument& e) { - cout << "invalid_argument parsing commitCount2 from: " << SParseJSONObject(r2.content)["CommitCount"] << endl; - cout << r2.content << endl; + cout << "invalid_argument parsing commitCount2." << endl; + cout << r2.serialize() << endl; } catch (const out_of_range& e) { - cout << "out_of_range parsing commitCount2 from: " << SParseJSONObject(r2.content)["CommitCount"] << endl; - cout << r2.content << endl; + cout << "out_of_range parsing commitCount2." << endl; + cout << r2.serialize() << endl; } if (commitCount2 == commitCount) { break; From f41dfbc21c3927bb5eaf7683e42ed83e060161f8 Mon Sep 17 00:00:00 2001 From: Phillip Smith Date: Wed, 13 Mar 2024 15:23:41 +1100 Subject: [PATCH 5/5] Evelate "lost our leader" message from hmmm to warn --- sqlitecluster/SQLiteNode.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index ec1792284..040a98a60 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1818,7 +1818,7 @@ void SQLiteNode::_onDisconnect(SQLitePeer* peer) { if (peer == _leadPeer) { // We've lost our leader: make sure we aren't waiting for // transaction response and re-SEARCH - PHMMM("Lost our LEADER, re-SEARCHING."); + PWARN("Lost our LEADER, re-SEARCHING."); SASSERTWARN(_state == SQLiteNodeState::SUBSCRIBING || _state == SQLiteNodeState::FOLLOWING); { _leadPeer = nullptr;