Skip to content

Commit

Permalink
Attempt to kill peer connections on crash
Browse files Browse the repository at this point in the history
  • Loading branch information
tylerkaraszewski committed Feb 23, 2024
1 parent 26a6529 commit d8ba53d
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 2 deletions.
1 change: 1 addition & 0 deletions BedrockServer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -712,6 +712,7 @@ void BedrockServer::runCommand(unique_ptr<BedrockCommand>&& _command, bool isBlo
// signals, like SIGSEGV, this function will be called.
SSetSignalHandlerDieFunc([&](){
_clusterMessenger->runOnAll(_generateCrashMessage(command));
_syncNode->kill();
});

// If we dequeue a status or control command, handle it immediately.
Expand Down
11 changes: 9 additions & 2 deletions libstuff/SSignal.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#include "libstuff.h"

#include <sqlitecluster/SQLiteNode.h>
#include <execinfo.h>
#include <fcntl.h>
#include <signal.h>
Expand Down Expand Up @@ -202,12 +202,19 @@ void _SSignal_StackTrace(int signum, siginfo_t *info, void *ucontext) {
SWARN("Calling DIE function.");
SSignalHandlerDieFunc();
SSignalHandlerDieFunc = [](){};
SWARN("DIE function returned, aborting (if not done).");
SWARN("DIE function returned.");
if (SQLiteNode::KILLABLE_SQLITE_NODE) {
SWARN("Killing peer connections.");
SQLiteNode::KILLABLE_SQLITE_NODE->kill();
}
}

// If we weren't already in ABORT, we'll call that. The second call will skip the above callstack generation.
if (signum != SIGABRT) {
SWARN("Aborting.");
abort();
} else {
SWARN("Already in ABORT.");
}
} else {
SALERT("Non-signal thread got signal " << strsignal(signum) << "(" << signum << "), which wasn't expected");
Expand Down
10 changes: 10 additions & 0 deletions sqlitecluster/SQLiteNode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@
#undef SLOGPREFIX
#define SLOGPREFIX "{" << _name << "/" << SQLiteNode::stateName(_state) << "} "

SQLiteNode* SQLiteNode::KILLABLE_SQLITE_NODE{0};

// Initializations for static vars.
const uint64_t SQLiteNode::RECV_TIMEOUT{STIME_US_PER_S * 30};

Expand Down Expand Up @@ -145,6 +147,7 @@ SQLiteNode::SQLiteNode(SQLiteServer& server, shared_ptr<SQLitePool> dbPool, cons
_stateTimeout(STimeNow() + firstTimeout),
_syncPeer(nullptr)
{
KILLABLE_SQLITE_NODE = this;
SASSERT(_originalPriority >= 0);
onPrepareHandlerEnabled = false;

Expand Down Expand Up @@ -2716,3 +2719,10 @@ SQLiteNodeState SQLiteNode::stateFromName(const string& name) {
return it->second;
}
}

void SQLiteNode::kill() {
for (SQLitePeer* peer : _peerList) {
SWARN("Killing peer: " << peer->name);
peer->reset();
}
}
7 changes: 7 additions & 0 deletions sqlitecluster/SQLiteNode.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@ class SQLiteNode : public STCPManager {
NUM_CONSISTENCY_LEVELS
};

// This is a globally accessible pointer to some node instance. The intention here is to let signal handling code attempt to kill outstanding
// peer connections on this node before shutting down.
static SQLiteNode* KILLABLE_SQLITE_NODE;

// Receive timeout for cluster messages.
static const uint64_t RECV_TIMEOUT;

Expand Down Expand Up @@ -152,6 +156,9 @@ class SQLiteNode : public STCPManager {
// Call this if you want to shut down the node.
void beginShutdown();

// kill all peer connections on this node.
void kill();

// Handle any read/write events that occurred.
void postPoll(fd_map& fdm, uint64_t& nextActivity);

Expand Down

0 comments on commit d8ba53d

Please sign in to comment.