From d78ba8999cd4c7fb35c9cce34b65578316d1f95b Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Tue, 3 Oct 2023 08:26:21 -0700 Subject: [PATCH 1/2] Shorten timeout when no sockets to poll --- BedrockServer.cpp | 38 +++++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/BedrockServer.cpp b/BedrockServer.cpp index 45f8cf5fe..9f9142770 100644 --- a/BedrockServer.cpp +++ b/BedrockServer.cpp @@ -820,21 +820,41 @@ void BedrockServer::runCommand(unique_ptr&& _command, bool isBlo networkLoopCount++; fd_map fdm; command->prePoll(fdm); - const uint64_t now = STimeNow(); - uint64_t nextActivity = 0; - S_poll(fdm, max(nextActivity, now) - now); - // Timeout is five minutes unless we're shutting down or standing down, in which case it's 5 seconds. - // Note that BedrockCommad::postPoll sets the timeout to the command's timeout if it's lower than this value anyway, - // So this only has an effect if it will be shorter than the command's timeout. - uint64_t maxWaitMs = 5 * 60 * 1'000; + // Determine how long we'll wait in `poll`. + uint64_t maxWaitUs = 0; + + // The default case is to wait until the command will time out. + uint64_t now = STimeNow(); + if (now < command->timeout()) { + maxWaitUs = command->timeout() - now; + } else { + // The command is already timed out. This will hit the check for core.isTimedOut(command) below. + break; + } + + // There are two other special cases for the time to wait. + if (fdm.empty()) { + // If there are no sockets to poll, wait 1 second. + // Why would there be no sockets? It's because Auth::Stripe, as a rate-limiting feature, attaches sockets to requests after their made. + // This means a request can sit around with no actual socket attached to it for some length of time until it's turn to talk to Stripe comes up. + // If that happens though, and we're sitting in `poll` when it becomes our turn, we will wait the full five minute timeout of the original `poll` + // call before we time out and try again wit the newly-attached socket. + // Setting this to one second lets us try again more frequently. This is probably not the ideal way to handle this, but it works for now. + maxWaitUs = 1'000'000; + } + + // Also, if we're shutting down or standing down, wait 1 second. This keeps the rest of the server from being blocked on commands that won't finish. auto _syncNodeCopy = atomic_load(&_syncNode); if (_shutdownState.load() != RUNNING || (_syncNodeCopy && _syncNodeCopy->getState() == SQLiteNodeState::STANDINGDOWN)) { - maxWaitMs = 5'000; + maxWaitUs = 1'000'000; } + // Ok, go ahead and `poll`. + S_poll(fdm, maxWaitUs); + auto start = STimeNow(); - command->postPoll(fdm, nextActivity, maxWaitMs); + command->postPoll(fdm, maxWaitUs, maxWaitUs / 1000); postPollCumulativeTime += (STimeNow() - start); } From 2502f9c87387474ac053081755fe678c09ff5f73 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Tue, 3 Oct 2023 11:30:59 -0700 Subject: [PATCH 2/2] There are too many types of timeout here --- BedrockServer.cpp | 10 ++++++++-- libstuff/SHTTPSManager.h | 2 ++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/BedrockServer.cpp b/BedrockServer.cpp index 9f9142770..8f721e7e5 100644 --- a/BedrockServer.cpp +++ b/BedrockServer.cpp @@ -845,21 +845,27 @@ void BedrockServer::runCommand(unique_ptr&& _command, bool isBlo } // Also, if we're shutting down or standing down, wait 1 second. This keeps the rest of the server from being blocked on commands that won't finish. + bool shuttingDown = false; auto _syncNodeCopy = atomic_load(&_syncNode); if (_shutdownState.load() != RUNNING || (_syncNodeCopy && _syncNodeCopy->getState() == SQLiteNodeState::STANDINGDOWN)) { maxWaitUs = 1'000'000; + shuttingDown = true; } // Ok, go ahead and `poll`. S_poll(fdm, maxWaitUs); + + // The 3rd parameter to `postPoll` here is the total allowed idle time on this connection. We will kill connections that do nothing at all after 5 minutes normally, + // or after only 5 seconds when we're shutting down so that we can clean up and move along. + uint64_t ignore{0}; auto start = STimeNow(); - command->postPoll(fdm, maxWaitUs, maxWaitUs / 1000); + command->postPoll(fdm, ignore, shuttingDown ? 5'000 : 300'000); postPollCumulativeTime += (STimeNow() - start); } if (networkLoopCount) { - SINFO("Completed HTTPS request in " << networkLoopCount << " loops with " << postPollCumulativeTime << " total time in postPoll"); + SINFO("Completed HTTPS request in " << networkLoopCount << " loops with " << postPollCumulativeTime << "us total time in postPoll"); } // Get a DB handle to work on. This will automatically be returned when dbScope goes out of scope. diff --git a/libstuff/SHTTPSManager.h b/libstuff/SHTTPSManager.h index 6f032e647..9d2a1d1a3 100644 --- a/libstuff/SHTTPSManager.h +++ b/libstuff/SHTTPSManager.h @@ -33,6 +33,8 @@ class SStandaloneHTTPSManager : public STCPManager { void prePoll(fd_map& fdm, Transaction& transaction); // Default timeout for HTTPS requests is 5 minutes.This can be changed on any call to postPoll. + // This is a total amount of milliseconds of idle activity since the last send on a socket before killing it. + // The purpose of this is to be able to shut down when no activity is happening. void postPoll(fd_map& fdm, Transaction& transaction, uint64_t& nextActivity, uint64_t timeoutMS = (5 * 60 * 1000)); // Close a transaction and remove it from our internal lists.