From 6daf2534ffc7137e60f4a3bc762d086a677fb9a0 Mon Sep 17 00:00:00 2001 From: Daniel Silva Date: Tue, 4 Jun 2024 18:45:57 +0200 Subject: [PATCH] adding comment --- sqlitecluster/SQLiteNode.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 02a892e09..316e97f4c 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1716,7 +1716,10 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { uint64_t threadAttemptStartTimestamp = STimeNow(); thread(&SQLiteNode::_replicate, this, peer, message, _dbPool->getIndex(false), threadAttemptStartTimestamp).detach(); } catch (const system_error& e) { - + // If the server is strugling and falling behind on replication, we might have too many threads + // causing a resource exhaustion. If that happens, all the transations that are already threaded + // and waiting for the transaction that failed will be stuck in an infinite loop. To prevent that + // we're cancelling all threads that would need to wait on the transaction that is failing. uint64_t cancelAfter = message.calcU64("NewCount") - 1; _localCommitNotifier.cancel(cancelAfter); _leaderCommitNotifier.cancel(cancelAfter);