From 00d70660bee347e03713a07e8426fe590c1a3616 Mon Sep 17 00:00:00 2001 From: Roman Gershman Date: Wed, 22 Jan 2025 13:29:09 +0200 Subject: [PATCH] chore: improve test_timeout robustness 1. use assert_eventually 2. add more logs 3. unrelated - add a stats event to track timeout shutdowns. --- src/server/server_family.cc | 1 + src/server/server_state.cc | 5 +++-- src/server/server_state.h | 1 + tests/dragonfly/connection_test.py | 13 +++++++++++-- 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/server/server_family.cc b/src/server/server_family.cc index 8d78de1b212b..00095e10db98 100644 --- a/src/server/server_family.cc +++ b/src/server/server_family.cc @@ -2290,6 +2290,7 @@ void ServerFamily::Info(CmdArgList args, const CommandContext& cmd_cntx) { append("pipeline_queue_length", m.facade_stats.conn_stats.dispatch_queue_entries); append("send_delay_ms", GetDelayMs(m.oldest_pending_send_ts)); + append("timeout_disconnects", m.coordinator_stats.conn_timeout_events); } if (should_enter("MEMORY")) { diff --git a/src/server/server_state.cc b/src/server/server_state.cc index 5f29c7271bae..11554d66598a 100644 --- a/src/server/server_state.cc +++ b/src/server/server_state.cc @@ -34,7 +34,7 @@ ServerState::Stats::Stats(unsigned num_shards) : tx_width_freq_arr(num_shards) { } ServerState::Stats& ServerState::Stats::Add(const ServerState::Stats& other) { - static_assert(sizeof(Stats) == 19 * 8, "Stats size mismatch"); + static_assert(sizeof(Stats) == 20 * 8, "Stats size mismatch"); #define ADD(x) this->x += (other.x) @@ -61,7 +61,7 @@ ServerState::Stats& ServerState::Stats::Add(const ServerState::Stats& other) { ADD(compressed_blobs); ADD(oom_error_cmd_cnt); - + ADD(conn_timeout_events); if (this->tx_width_freq_arr.size() > 0) { DCHECK_EQ(this->tx_width_freq_arr.size(), other.tx_width_freq_arr.size()); this->tx_width_freq_arr += other.tx_width_freq_arr; @@ -279,6 +279,7 @@ void ServerState::ConnectionsWatcherFb(util::ListenerInterface* main) { if (conn) { VLOG(1) << "Closing connection due to timeout: " << conn->GetClientInfo(); conn->ShutdownSelf(); + stats.conn_timeout_events++; } } } diff --git a/src/server/server_state.h b/src/server/server_state.h index 044cd6774c00..6d77e759d08b 100644 --- a/src/server/server_state.h +++ b/src/server/server_state.h @@ -131,6 +131,7 @@ class ServerState { // public struct - to allow initialization. // Number of times we rejected command dispatch due to OOM condition. uint64_t oom_error_cmd_cnt = 0; + uint32_t conn_timeout_events = 0; std::valarray tx_width_freq_arr; }; diff --git a/tests/dragonfly/connection_test.py b/tests/dragonfly/connection_test.py index 224f79e77baa..c6dd949c6d69 100755 --- a/tests/dragonfly/connection_test.py +++ b/tests/dragonfly/connection_test.py @@ -1059,6 +1059,15 @@ async def test_timeout(df_server: DflyInstance, async_client: aioredis.Redis): await another_client.ping() clients = await async_client.client_list() assert len(clients) == 2 + await asyncio.sleep(2) - clients = await async_client.client_list() - assert len(clients) == 1 + + @assert_eventually + async def wait_for_conn_drop(): + clients = await async_client.client_list() + logging.info("clients: %s", clients) + assert len(clients) <= 1 + + await wait_for_conn_drop() + info = await async_client.info("clients") + assert int(info["timeout_disconnects"]) >= 1 \ No newline at end of file