Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use SO_LINGER with timeout 0 to avoid thousands of server sockets in TIME_WAIT when using small reconnect intervals #260

Merged
merged 4 commits into from
Jul 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@ jobs:
sudo apt-get install redis
sudo service redis-server stop

- name: Increase connection limit
run: |
sudo sysctl -w net.ipv4.tcp_fin_timeout=10
sudo sysctl -w net.ipv4.tcp_tw_reuse=1
ulimit -n 40960

- name: Generate TLS test certificates
if: matrix.platform == 'ubuntu-latest'
run: |
Expand Down
20 changes: 14 additions & 6 deletions shard_connection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,12 +231,18 @@ int shard_connection::setup_socket(struct connect_info* addr) {
return -1;
}

// configure socket behavior
struct linger ling = {0, 0};
int flags = 1;

int error = setsockopt(sockfd, SOL_SOCKET, SO_KEEPALIVE, (void *) &flags, sizeof(flags));
assert(error == 0);

/*
* Configure socket behavior:
* If l_onoff is non-zero and l_linger is zero:
* The socket will discard any unsent data and the close() call will return immediately.
*/
struct linger ling;
ling.l_onoff = 1; // Enable SO_LINGER
ling.l_linger = 0; // Discard any unsent data and close immediately
error = setsockopt(sockfd, SOL_SOCKET, SO_LINGER, (void *) &ling, sizeof(ling));
assert(error == 0);

Expand Down Expand Up @@ -264,7 +270,7 @@ int shard_connection::connect(struct connect_info* addr) {
// setup socket
int sockfd = setup_socket(addr);
if (sockfd < 0) {
fprintf(stderr, "Failed to setup socket: %s", strerror(errno));
fprintf(stderr, "Failed to setup socket: %s\n", strerror(errno));
return -1;
}

Expand Down Expand Up @@ -478,8 +484,10 @@ void shard_connection::process_response(void)
// client manage connection & disconnection of shard
m_conns_manager->disconnect();
ret = m_conns_manager->connect();
assert(ret == 0);

if (ret != 0) {
benchmark_error_log("failed to reconnect.\n");
exit(1);
}
return;
}
}
Expand Down
29 changes: 29 additions & 0 deletions tests/tests_oss_simple_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,35 @@ def test_default_set_get_1_1(env):
# assert same number of gets and sets
env.assertEqual(merged_command_stats['cmdstat_set']['calls'], merged_command_stats['cmdstat_get']['calls'])

# run each test on different env
def test_short_reconnect_interval(env):
# cluster mode dose not support reconnect-interval option
env.skipOnCluster()
benchmark_specs = {"name": env.testName, "args": ['--reconnect-interval=1']}
addTLSArgs(benchmark_specs, env)
config = get_default_memtier_config()
master_nodes_list = env.getMasterNodesList()
overall_expected_request_count = get_expected_request_count(config)

add_required_env_arguments(benchmark_specs, config, env, master_nodes_list)

# Create a temporary directory
test_dir = tempfile.mkdtemp()

config = RunConfig(test_dir, env.testName, config, {})
ensure_clean_benchmark_folder(config.results_dir)

benchmark = Benchmark.from_json(config, benchmark_specs)

# benchmark.run() returns True if the return code of memtier_benchmark was 0
memtier_ok = benchmark.run()

master_nodes_connections = env.getOSSMasterNodesConnectionList()
merged_command_stats = {'cmdstat_set': {'calls': 0}, 'cmdstat_get': {'calls': 0}}
overall_request_count = agg_info_commandstats(master_nodes_connections, merged_command_stats)
assert_minimum_memtier_outcomes(config, env, memtier_ok, overall_expected_request_count, overall_request_count)


# run each test on different env
def test_default_set_get_3_runs(env):
run_count = 3
Expand Down
Loading