From cc2aee7f3bb557b2fb0683b83c6d11a50014e614 Mon Sep 17 00:00:00 2001
From: Mikhail Uvarov <arcusfelis@gmail.com>
Date: Tue, 12 Mar 2024 12:27:36 +0100
Subject: [PATCH 01/30] Add cets_test_node/cets_test_rpc helpers

---
 test/cets_SUITE.erl     | 87 +++++++++--------------------------------
 test/cets_test_node.erl | 37 ++++++++++++++++++
 test/cets_test_rpc.erl  | 52 ++++++++++++++++++++++++
 3 files changed, 108 insertions(+), 68 deletions(-)
 create mode 100644 test/cets_test_node.erl
 create mode 100644 test/cets_test_rpc.erl

diff --git a/test/cets_SUITE.erl b/test/cets_SUITE.erl
index 9235526..edd4187 100644
--- a/test/cets_SUITE.erl
+++ b/test/cets_SUITE.erl
@@ -12,6 +12,25 @@
 
 -compile([export_all, nowarn_export_all]).
 
+-import(cets_test_node, [
+    block_node/2,
+    reconnect_node/2,
+    disconnect_node/2,
+    disconnect_node_by_name/2
+]).
+
+-import(cets_test_rpc, [
+    rpc/4,
+    insert/3,
+    insert_many/3,
+    delete/3,
+    delete_request/3,
+    delete_many/3,
+    dump/2,
+    other_nodes/2,
+    join/4
+]).
+
 all() ->
     [
         {group, cets},
@@ -3009,46 +3028,6 @@ wait_for_name_to_be_free(Node, Name) ->
     %% Cleaner is fast, but not instant.
     cets_test_wait:wait_until(fun() -> rpc(Node, erlang, whereis, [Name]) end, undefined).
 
-insert(Node, Tab, Rec) ->
-    rpc(Node, cets, insert, [Tab, Rec]).
-
-insert_many(Node, Tab, Records) ->
-    rpc(Node, cets, insert_many, [Tab, Records]).
-
-delete(Node, Tab, Key) ->
-    rpc(Node, cets, delete, [Tab, Key]).
-
-delete_request(Node, Tab, Key) ->
-    rpc(Node, cets, delete_request, [Tab, Key]).
-
-delete_many(Node, Tab, Keys) ->
-    rpc(Node, cets, delete_many, [Tab, Keys]).
-
-dump(Node, Tab) ->
-    rpc(Node, cets, dump, [Tab]).
-
-other_nodes(Node, Tab) ->
-    rpc(Node, cets, other_nodes, [Tab]).
-
-join(Node1, Tab, Pid1, Pid2) ->
-    rpc(Node1, cets_join, join, [lock1, #{table => Tab}, Pid1, Pid2]).
-
-%% Apply function using rpc or peer module
-rpc(Peer, M, F, Args) when is_pid(Peer) ->
-    case peer:call(Peer, M, F, Args) of
-        {badrpc, Error} ->
-            ct:fail({badrpc, Error});
-        Other ->
-            Other
-    end;
-rpc(Node, M, F, Args) when is_atom(Node) ->
-    case rpc:call(Node, M, F, Args) of
-        {badrpc, Error} ->
-            ct:fail({badrpc, Error});
-        Other ->
-            Other
-    end.
-
 receive_message(M) ->
     receive
         M -> ok
@@ -3241,34 +3220,6 @@ wait_for_down(Pid) ->
     after 5000 -> ct:fail({wait_for_down_timeout, Pid})
     end.
 
-%% Disconnect node until manually connected
-block_node(Node, Peer) when is_atom(Node), is_pid(Peer) ->
-    rpc(Peer, erlang, set_cookie, [node(), invalid_cookie]),
-    disconnect_node(Peer, node()),
-    %% Wait till node() is notified about the disconnect
-    cets_test_wait:wait_until(fun() -> rpc(Peer, net_adm, ping, [node()]) end, pang),
-    cets_test_wait:wait_until(fun() -> rpc(node(), net_adm, ping, [Node]) end, pang).
-
-reconnect_node(Node, Peer) when is_atom(Node), is_pid(Peer) ->
-    rpc(Peer, erlang, set_cookie, [node(), erlang:get_cookie()]),
-    %% Very rarely it could return pang
-    cets_test_wait:wait_until(fun() -> rpc(Peer, net_adm, ping, [node()]) end, pong),
-    cets_test_wait:wait_until(fun() -> rpc(node(), net_adm, ping, [Node]) end, pong).
-
-disconnect_node(RPCNode, DisconnectNode) ->
-    rpc(RPCNode, erlang, disconnect_node, [DisconnectNode]).
-
-disconnect_node_by_name(Config, Id) ->
-    Peer = maps:get(Id, proplists:get_value(peers, Config)),
-    Node = maps:get(Id, proplists:get_value(nodes, Config)),
-    %% We could need to retry to disconnect, if the local node is currently trying to establish a connection
-    %% with Node2 (could be triggered by the previous tests)
-    F = fun() ->
-        disconnect_node(Peer, node()),
-        lists:member(Node, nodes())
-    end,
-    cets_test_wait:wait_until(F, false).
-
 not_leader(Leader, Other, Leader) ->
     Other;
 not_leader(Other, Leader, Leader) ->
diff --git a/test/cets_test_node.erl b/test/cets_test_node.erl
new file mode 100644
index 0000000..1f7421b
--- /dev/null
+++ b/test/cets_test_node.erl
@@ -0,0 +1,37 @@
+-module(cets_test_node).
+-export([
+    block_node/2,
+    reconnect_node/2,
+    disconnect_node/2,
+    disconnect_node_by_name/2
+]).
+
+-import(cets_test_rpc, [rpc/4]).
+
+%% Disconnect node until manually connected
+block_node(Node, Peer) when is_atom(Node), is_pid(Peer) ->
+    rpc(Peer, erlang, set_cookie, [node(), invalid_cookie]),
+    disconnect_node(Peer, node()),
+    %% Wait till node() is notified about the disconnect
+    cets_test_wait:wait_until(fun() -> rpc(Peer, net_adm, ping, [node()]) end, pang),
+    cets_test_wait:wait_until(fun() -> rpc(node(), net_adm, ping, [Node]) end, pang).
+
+reconnect_node(Node, Peer) when is_atom(Node), is_pid(Peer) ->
+    rpc(Peer, erlang, set_cookie, [node(), erlang:get_cookie()]),
+    %% Very rarely it could return pang
+    cets_test_wait:wait_until(fun() -> rpc(Peer, net_adm, ping, [node()]) end, pong),
+    cets_test_wait:wait_until(fun() -> rpc(node(), net_adm, ping, [Node]) end, pong).
+
+disconnect_node(RPCNode, DisconnectNode) ->
+    rpc(RPCNode, erlang, disconnect_node, [DisconnectNode]).
+
+disconnect_node_by_name(Config, Id) ->
+    Peer = maps:get(Id, proplists:get_value(peers, Config)),
+    Node = maps:get(Id, proplists:get_value(nodes, Config)),
+    %% We could need to retry to disconnect, if the local node is currently trying to establish a connection
+    %% with Node2 (could be triggered by the previous tests)
+    F = fun() ->
+        disconnect_node(Peer, node()),
+        lists:member(Node, nodes())
+    end,
+    cets_test_wait:wait_until(F, false).
diff --git a/test/cets_test_rpc.erl b/test/cets_test_rpc.erl
new file mode 100644
index 0000000..971b350
--- /dev/null
+++ b/test/cets_test_rpc.erl
@@ -0,0 +1,52 @@
+-module(cets_test_rpc).
+-export([
+    rpc/4,
+    insert/3,
+    insert_many/3,
+    delete/3,
+    delete_request/3,
+    delete_many/3,
+    dump/2,
+    other_nodes/2,
+    join/4
+]).
+
+%% Apply function using rpc or peer module
+rpc(Peer, M, F, Args) when is_pid(Peer) ->
+    case peer:call(Peer, M, F, Args) of
+        {badrpc, Error} ->
+            ct:fail({badrpc, Error});
+        Other ->
+            Other
+    end;
+rpc(Node, M, F, Args) when is_atom(Node) ->
+    case rpc:call(Node, M, F, Args) of
+        {badrpc, Error} ->
+            ct:fail({badrpc, Error});
+        Other ->
+            Other
+    end.
+
+insert(Node, Tab, Rec) ->
+    rpc(Node, cets, insert, [Tab, Rec]).
+
+insert_many(Node, Tab, Records) ->
+    rpc(Node, cets, insert_many, [Tab, Records]).
+
+delete(Node, Tab, Key) ->
+    rpc(Node, cets, delete, [Tab, Key]).
+
+delete_request(Node, Tab, Key) ->
+    rpc(Node, cets, delete_request, [Tab, Key]).
+
+delete_many(Node, Tab, Keys) ->
+    rpc(Node, cets, delete_many, [Tab, Keys]).
+
+dump(Node, Tab) ->
+    rpc(Node, cets, dump, [Tab]).
+
+other_nodes(Node, Tab) ->
+    rpc(Node, cets, other_nodes, [Tab]).
+
+join(Node1, Tab, Pid1, Pid2) ->
+    rpc(Node1, cets_join, join, [lock1, #{table => Tab}, Pid1, Pid2]).

From 72cf27b787bd4966f4deed5893d32e7079e85dab Mon Sep 17 00:00:00 2001
From: Mikhail Uvarov <arcusfelis@gmail.com>
Date: Tue, 12 Mar 2024 12:51:18 +0100
Subject: [PATCH 02/30] Add cets_test_setup helper

---
 test/cets_SUITE.erl      | 113 ++++++---------------------------------
 test/cets_test_setup.erl |  99 ++++++++++++++++++++++++++++++++++
 test/cets_test_wait.erl  |  22 ++++++++
 3 files changed, 138 insertions(+), 96 deletions(-)
 create mode 100644 test/cets_test_setup.erl

diff --git a/test/cets_SUITE.erl b/test/cets_SUITE.erl
index edd4187..0bc7870 100644
--- a/test/cets_SUITE.erl
+++ b/test/cets_SUITE.erl
@@ -31,6 +31,18 @@
     join/4
 ]).
 
+-import(cets_test_setup, [
+    start/2,
+    start_local/1,
+    start_local/2,
+    start_disco/2,
+    start_simple_disco/0
+]).
+
+-import(cets_test_wait, [
+    wait_for_down/1
+]).
+
 all() ->
     [
         {group, cets},
@@ -254,7 +266,7 @@ cets_seq_no_log_cases() ->
     ].
 
 init_per_suite(Config) ->
-    init_cleanup_table(),
+    cets_test_setup:init_cleanup_table(),
     Names = [ct2, ct3, ct4, ct5, ct6, ct7],
     {Nodes, Peers} = lists:unzip([cets_test_peer:start_node(N) || N <- Names]),
     [
@@ -288,7 +300,7 @@ init_per_testcase_generic(Name, Config) ->
     [{testcase, Name} | Config].
 
 end_per_testcase(_, _Config) ->
-    wait_for_cleanup(),
+    cets_test_setup:wait_for_cleanup(),
     ok.
 
 %% Modules that use a multiline LOG_ macro
@@ -1317,7 +1329,7 @@ pause_on_remote_node_crashes(Config) ->
     Tab = make_name(Config),
     {ok, Pid1} = start(Node1, Tab),
     {ok, Pid2} = start(Node2, Tab),
-    ok = rpc(Node2, ?MODULE, mock_pause_on_remote_node_failing, []),
+    ok = rpc(Node2, cets_test_setup, mock_pause_on_remote_node_failing, []),
     try
         {error,
             {task_failed,
@@ -2876,14 +2888,14 @@ cets_ping_non_existing_node(_Config) ->
     pang = cets_ping:ping('mongooseim@non_existing_host').
 
 pre_connect_fails_on_our_node(_Config) ->
-    mock_epmd(),
+    cets_test_setup:mock_epmd(),
     %% We would fail to connect to the remote EPMD but we would get an IP
     pang = cets_ping:ping('mongooseim@resolvabletobadip'),
     meck:unload().
 
 pre_connect_fails_on_one_of_the_nodes(Config) ->
     #{ct2 := Node2} = proplists:get_value(nodes, Config),
-    mock_epmd(),
+    cets_test_setup:mock_epmd(),
     %% We would get pong on Node2, but would fail an RPC to our hode
     pang = rpc(Node2, cets_ping, ping, ['cetsnode1@localhost']),
     History = meck:history(erl_epmd),
@@ -2958,76 +2970,6 @@ start_link_local(Name, Opts) ->
     schedule_cleanup(Pid),
     {ok, Pid}.
 
-start_local(Name) ->
-    start_local(Name, #{}).
-
-start_local(Name, Opts) ->
-    catch cets:stop(Name),
-    wait_for_name_to_be_free(node(), Name),
-    {ok, Pid} = cets:start(Name, Opts),
-    schedule_cleanup(Pid),
-    {ok, Pid}.
-
-schedule_cleanup(Pid) ->
-    Me = self(),
-    Cleaner = proc_lib:spawn(fun() ->
-        Ref = erlang:monitor(process, Me),
-        receive
-            {'DOWN', Ref, process, Me, _} ->
-                %% We do an RPC call, because erlang distribution
-                %% could not be always reliable (because we test netsplits)
-                rpc(cets_test_peer:node_to_peer(node(Pid)), cets, stop, [Pid]),
-                ets:delete_object(cleanup_table, {Me, self()})
-        end
-    end),
-    ets:insert(cleanup_table, {Me, Cleaner}).
-
-init_cleanup_table() ->
-    spawn(fun() ->
-        ets:new(cleanup_table, [named_table, public, bag]),
-        timer:sleep(infinity)
-    end).
-
-%% schedule_cleanup is async, so this function is waiting for it to finish
-wait_for_cleanup() ->
-    [
-        wait_for_down(Cleaner)
-     || {Owner, Cleaner} <- ets:tab2list(cleanup_table), not is_process_alive(Owner)
-    ].
-
-start(Node, Tab) ->
-    catch rpc(Node, cets, stop, [Tab]),
-    wait_for_name_to_be_free(Node, Tab),
-    {ok, Pid} = rpc(Node, cets, start, [Tab, #{}]),
-    schedule_cleanup(Pid),
-    {ok, Pid}.
-
-start_disco(Node, Opts) ->
-    case Opts of
-        #{name := Name} ->
-            catch rpc(Node, cets, stop, [Name]),
-            wait_for_name_to_be_free(Node, Name);
-        _ ->
-            ok
-    end,
-    {ok, Pid} = rpc(Node, cets_discovery, start, [Opts]),
-    schedule_cleanup(Pid),
-    Pid.
-
-start_simple_disco() ->
-    F = fun(State) ->
-        {{ok, []}, State}
-    end,
-    {ok, Pid} = cets_discovery:start_link(#{
-        backend_module => cets_discovery_fun, get_nodes_fn => F
-    }),
-    Pid.
-
-wait_for_name_to_be_free(Node, Name) ->
-    %% Wait for the old process to be killed by the cleaner in schedule_cleanup.
-    %% Cleaner is fast, but not instant.
-    cets_test_wait:wait_until(fun() -> rpc(Node, erlang, whereis, [Name]) end, undefined).
-
 receive_message(M) ->
     receive
         M -> ok
@@ -3213,13 +3155,6 @@ get_message_queue_length(Pid) ->
     {message_queue_len, Len} = erlang:process_info(Pid, message_queue_len),
     Len.
 
-wait_for_down(Pid) ->
-    Mon = erlang:monitor(process, Pid),
-    receive
-        {'DOWN', Mon, process, Pid, Reason} -> Reason
-    after 5000 -> ct:fail({wait_for_down_timeout, Pid})
-    end.
-
 not_leader(Leader, Other, Leader) ->
     Other;
 not_leader(Other, Leader, Leader) ->
@@ -3312,20 +3247,6 @@ make_signalling_process() ->
         end
     end).
 
-mock_epmd() ->
-    meck:new(erl_epmd, [passthrough, unstick]),
-    meck:expect(erl_epmd, address_please, fun
-        ("cetsnode1", "localhost", inet) -> {ok, {192, 168, 100, 134}};
-        (Name, Host, Family) -> meck:passthrough([Name, Host, Family])
-    end).
-
-mock_pause_on_remote_node_failing() ->
-    meck:new(cets_join, [passthrough, no_link]),
-    meck:expect(cets_join, pause_on_remote_node, fun(_JoinerPid, _AllPids) ->
-        error(mock_pause_on_remote_node_failing)
-    end),
-    ok.
-
 %% Fails if List has duplicates
 assert_unique(List) ->
     ?assertEqual([], List -- lists:usort(List)),
diff --git a/test/cets_test_setup.erl b/test/cets_test_setup.erl
new file mode 100644
index 0000000..45afae7
--- /dev/null
+++ b/test/cets_test_setup.erl
@@ -0,0 +1,99 @@
+-module(cets_test_setup).
+-export([
+    mock_epmd/0,
+    mock_pause_on_remote_node_failing/0
+]).
+
+-export([
+    start_local/1,
+    start_local/2,
+    start/2,
+    start_disco/2,
+    start_simple_disco/0
+]).
+
+-export([
+    init_cleanup_table/0,
+    wait_for_cleanup/0
+]).
+
+-import(cets_test_rpc, [rpc/4]).
+
+mock_epmd() ->
+    meck:new(erl_epmd, [passthrough, unstick]),
+    meck:expect(erl_epmd, address_please, fun
+        ("cetsnode1", "localhost", inet) -> {ok, {192, 168, 100, 134}};
+        (Name, Host, Family) -> meck:passthrough([Name, Host, Family])
+    end).
+
+mock_pause_on_remote_node_failing() ->
+    meck:new(cets_join, [passthrough, no_link]),
+    meck:expect(cets_join, pause_on_remote_node, fun(_JoinerPid, _AllPids) ->
+        error(mock_pause_on_remote_node_failing)
+    end),
+    ok.
+
+start_local(Name) ->
+    start_local(Name, #{}).
+
+start_local(Name, Opts) ->
+    catch cets:stop(Name),
+    cets_test_wait:wait_for_name_to_be_free(node(), Name),
+    {ok, Pid} = cets:start(Name, Opts),
+    schedule_cleanup(Pid),
+    {ok, Pid}.
+
+start(Node, Tab) ->
+    catch rpc(Node, cets, stop, [Tab]),
+    cets_test_wait:wait_for_name_to_be_free(Node, Tab),
+    {ok, Pid} = rpc(Node, cets, start, [Tab, #{}]),
+    schedule_cleanup(Pid),
+    {ok, Pid}.
+
+start_disco(Node, Opts) ->
+    case Opts of
+        #{name := Name} ->
+            catch rpc(Node, cets, stop, [Name]),
+            cets_test_wait:wait_for_name_to_be_free(Node, Name);
+        _ ->
+            ok
+    end,
+    {ok, Pid} = rpc(Node, cets_discovery, start, [Opts]),
+    schedule_cleanup(Pid),
+    Pid.
+
+start_simple_disco() ->
+    F = fun(State) ->
+        {{ok, []}, State}
+    end,
+    {ok, Pid} = cets_discovery:start_link(#{
+        backend_module => cets_discovery_fun, get_nodes_fn => F
+    }),
+    Pid.
+
+schedule_cleanup(Pid) ->
+    Me = self(),
+    Cleaner = proc_lib:spawn(fun() ->
+        Ref = erlang:monitor(process, Me),
+        receive
+            {'DOWN', Ref, process, Me, _} ->
+                %% We do an RPC call, because erlang distribution
+                %% could not be always reliable (because we test netsplits)
+                rpc(cets_test_peer:node_to_peer(node(Pid)), cets, stop, [Pid]),
+                ets:delete_object(cleanup_table, {Me, self()})
+        end
+    end),
+    ets:insert(cleanup_table, {Me, Cleaner}).
+
+init_cleanup_table() ->
+    spawn(fun() ->
+        ets:new(cleanup_table, [named_table, public, bag]),
+        timer:sleep(infinity)
+    end).
+
+%% schedule_cleanup is async, so this function is waiting for it to finish
+wait_for_cleanup() ->
+    [
+        cets_test_wait:wait_for_down(Cleaner)
+     || {Owner, Cleaner} <- ets:tab2list(cleanup_table), not is_process_alive(Owner)
+    ].
diff --git a/test/cets_test_wait.erl b/test/cets_test_wait.erl
index 7cb5b7b..b34e82c 100644
--- a/test/cets_test_wait.erl
+++ b/test/cets_test_wait.erl
@@ -1,6 +1,12 @@
 -module(cets_test_wait).
 -export([wait_until/2]).
 
+%% Helpers
+-export([
+    wait_for_name_to_be_free/2,
+    wait_for_down/1
+]).
+
 %% From mongoose_helper
 
 %% @doc Waits `TimeLeft` for `Fun` to return `ExpectedValue`
@@ -72,3 +78,19 @@ wait_and_continue(
         time_left => TimeLeft - SleepTime,
         history => [FunResult | History]
     }).
+
+%% Helpers
+
+wait_for_name_to_be_free(Node, Name) ->
+    %% Wait for the old process to be killed by the cleaner in schedule_cleanup.
+    %% Cleaner is fast, but not instant.
+    cets_test_wait:wait_until(
+        fun() -> cets_test_rpc:rpc(Node, erlang, whereis, [Name]) end, undefined
+    ).
+
+wait_for_down(Pid) ->
+    Mon = erlang:monitor(process, Pid),
+    receive
+        {'DOWN', Mon, process, Pid, Reason} -> Reason
+    after 5000 -> ct:fail({wait_for_down_timeout, Pid})
+    end.

From d994f1a45311042dc7633847b3af9a6003778ed2 Mon Sep 17 00:00:00 2001
From: Mikhail Uvarov <arcusfelis@gmail.com>
Date: Tue, 12 Mar 2024 13:08:33 +0100
Subject: [PATCH 03/30] Move setup functions into cets_test_setup

---
 test/cets_SUITE.erl      | 163 ++++-----------------------------------
 test/cets_test_setup.erl | 158 ++++++++++++++++++++++++++++++++++++-
 test/cets_test_wait.erl  |  21 ++++-
 3 files changed, 192 insertions(+), 150 deletions(-)

diff --git a/test/cets_SUITE.erl b/test/cets_SUITE.erl
index 0bc7870..5e82b16 100644
--- a/test/cets_SUITE.erl
+++ b/test/cets_SUITE.erl
@@ -36,11 +36,24 @@
     start_local/1,
     start_local/2,
     start_disco/2,
-    start_simple_disco/0
+    start_simple_disco/0,
+    make_name/1,
+    make_name/2,
+    lock_name/1,
+    disco_name/1,
+    given_two_joined_tables/1,
+    given_two_joined_tables/2,
+    given_3_servers/1,
+    given_3_servers/2,
+    given_n_servers/3,
+    setup_two_nodes_and_discovery/1,
+    setup_two_nodes_and_discovery/2,
+    simulate_disco_restart/1
 ]).
 
 -import(cets_test_wait, [
-    wait_for_down/1
+    wait_for_down/1,
+    wait_for_ready/2
 ]).
 
 all() ->
@@ -893,7 +906,7 @@ join_fails_before_send_dump_and_there_are_pending_remote_ops(Config) ->
     receive_message(before_send_dump_called_for_pid1),
     cets:insert_request(Pid1, {1}),
     %% Check that the remote_op has reached Pid2 message box
-    cets_test_wait:wait_until(fun() -> count_remote_ops_in_the_message_box(Pid2) end, 1),
+    cets_test_wait:wait_for_remote_ops_in_the_message_box(Pid2, 1),
     sys:resume(Pid2),
     %% Wait till remote_op is processed
     cets:ping(Pid2),
@@ -2990,29 +3003,6 @@ flush_message(M) ->
         ok
     end.
 
-make_name(Config) ->
-    make_name(Config, 1).
-
-make_name(Config, Num) when is_integer(Num) ->
-    Testcase = proplists:get_value(testcase, Config),
-    list_to_atom(atom_to_list(Testcase) ++ "_" ++ integer_to_list(Num));
-make_name(Config, Atom) when is_atom(Atom) ->
-    Testcase = proplists:get_value(testcase, Config),
-    list_to_atom(atom_to_list(Testcase) ++ "_" ++ atom_to_list(Atom)).
-
-lock_name(Config) ->
-    Testcase = proplists:get_value(testcase, Config),
-    list_to_atom(atom_to_list(Testcase) ++ "_lock").
-
-disco_name(Config) ->
-    Testcase = proplists:get_value(testcase, Config),
-    list_to_atom(atom_to_list(Testcase) ++ "_disco").
-
-count_remote_ops_in_the_message_box(Pid) ->
-    {messages, Messages} = erlang:process_info(Pid, messages),
-    Ops = [M || M <- Messages, element(1, M) =:= remote_op],
-    length(Ops).
-
 set_join_ref(Pid, JoinRef) ->
     sys:replace_state(Pid, fun(#{join_ref := _} = State) -> State#{join_ref := JoinRef} end).
 
@@ -3021,118 +3011,6 @@ set_other_servers(Pid, Servers) ->
         State#{other_servers := Servers}
     end).
 
-given_two_joined_tables(Config) ->
-    given_two_joined_tables(Config, #{}).
-
-given_two_joined_tables(Config, Opts) ->
-    Tab1 = make_name(Config, 1),
-    Tab2 = make_name(Config, 2),
-    {ok, Pid1} = start_local(Tab1, Opts),
-    {ok, Pid2} = start_local(Tab2, Opts),
-    ok = cets_join:join(lock_name(Config), #{}, Pid1, Pid2),
-    #{
-        tab1 => Tab1,
-        tab2 => Tab2,
-        pid1 => Pid1,
-        pid2 => Pid2,
-        tabs => [Tab1, Tab2],
-        pids => [Pid1, Pid2]
-    }.
-
-given_3_servers(Config) ->
-    given_3_servers(Config, #{}).
-
-given_3_servers(Config, Opts) ->
-    given_n_servers(Config, 3, Opts).
-
-given_n_servers(Config, N, Opts) ->
-    Tabs = [make_name(Config, X) || X <- lists:seq(1, N)],
-    Pids = [
-        begin
-            {ok, Pid} = start_local(Tab, Opts),
-            Pid
-        end
-     || Tab <- Tabs
-    ],
-    #{pids => Pids, tabs => Tabs}.
-
-setup_two_nodes_and_discovery(Config) ->
-    setup_two_nodes_and_discovery(Config, []).
-
-%% Flags:
-%% - disco2 - start discovery on Node2
-%% - wait - call wait_for_ready/2
-setup_two_nodes_and_discovery(Config, Flags) ->
-    Me = self(),
-    Node1 = node(),
-    #{ct2 := Peer2} = proplists:get_value(peers, Config),
-    #{ct2 := Node2} = proplists:get_value(nodes, Config),
-    disconnect_node_by_name(Config, ct2),
-    Tab = make_name(Config),
-    {ok, _Pid1} = start(Node1, Tab),
-    {ok, _Pid2} = start(Peer2, Tab),
-    F = fun(State) ->
-        case lists:member(notify_get_nodes, Flags) of
-            true ->
-                Me ! get_nodes;
-            false ->
-                ok
-        end,
-        {{ok, [Node1, Node2]}, State}
-    end,
-    DiscoName = disco_name(Config),
-    DiscoOpts = #{
-        name => DiscoName, backend_module => cets_discovery_fun, get_nodes_fn => F
-    },
-    Disco = start_disco(Node1, DiscoOpts),
-    %% Start Disco on second node (it is not always needed)
-    Res =
-        case lists:member(disco2, Flags) of
-            true ->
-                Disco2 = start_disco(Node2, DiscoOpts),
-                cets_discovery:add_table(Disco2, Tab),
-                #{disco2 => Disco2};
-            false ->
-                #{}
-        end,
-    cets_discovery:add_table(Disco, Tab),
-    case lists:member(wait, Flags) of
-        true ->
-            wait_for_ready(Disco, 5000);
-        false ->
-            ok
-    end,
-    case lists:member(netsplit, Flags) of
-        true ->
-            %% Simulate a loss of connection between nodes
-            disconnect_node_by_name(Config, ct2);
-        false ->
-            ok
-    end,
-    Res#{
-        disco_name => DiscoName,
-        disco_opts => DiscoOpts,
-        disco => Disco,
-        node1 => Node1,
-        node2 => Node2,
-        peer2 => Peer2
-    }.
-
-simulate_disco_restart(#{
-    disco_opts := DiscoOpts,
-    disco2 := Disco2,
-    node1 := Node1,
-    node2 := Node2,
-    peer2 := Peer2
-}) ->
-    %% Instead of restart the node, restart the process. It is enough to get
-    %% a new start_time.
-    disconnect_node(Peer2, Node1),
-    rpc(Peer2, cets, stop, [Disco2]),
-    %% We actually would not detect the case of us just stopping the remote disco
-    %% server. Because we use nodeup/nodedown to detect downs, not monitors.
-    _RestartedDisco2 = start_disco(Node2, DiscoOpts).
-
 stopped_pid() ->
     %% Get a pid for a stopped process
     {Pid, Mon} = spawn_monitor(fun() -> ok end),
@@ -3259,15 +3137,6 @@ make_process() ->
         end
     end).
 
-wait_for_ready(Disco, Timeout) ->
-    try
-        ok = cets_discovery:wait_for_ready(Disco, Timeout)
-    catch
-        Class:Reason:Stacktrace ->
-            ct:pal("system_info: ~p", [cets_discovery:system_info(Disco)]),
-            erlang:raise(Class, Reason, Stacktrace)
-    end.
-
 %% Overwrites nodedown timestamp for the Node in the discovery server state
 set_nodedown_timestamp(Disco, Node, NewTimestamp) ->
     sys:replace_state(Disco, fun(#{nodedown_timestamps := Map} = State) ->
diff --git a/test/cets_test_setup.erl b/test/cets_test_setup.erl
index 45afae7..c92cdee 100644
--- a/test/cets_test_setup.erl
+++ b/test/cets_test_setup.erl
@@ -4,6 +4,11 @@
     mock_pause_on_remote_node_failing/0
 ]).
 
+-export([
+    init_cleanup_table/0,
+    wait_for_cleanup/0
+]).
+
 -export([
     start_local/1,
     start_local/2,
@@ -13,8 +18,27 @@
 ]).
 
 -export([
-    init_cleanup_table/0,
-    wait_for_cleanup/0
+    make_name/1,
+    make_name/2,
+    lock_name/1,
+    disco_name/1
+]).
+
+-export([
+    given_two_joined_tables/1,
+    given_two_joined_tables/2,
+    given_3_servers/1,
+    given_3_servers/2,
+    given_n_servers/3,
+    setup_two_nodes_and_discovery/1,
+    setup_two_nodes_and_discovery/2
+]).
+
+-export([simulate_disco_restart/1]).
+
+-import(cets_test_node, [
+    disconnect_node/2,
+    disconnect_node_by_name/2
 ]).
 
 -import(cets_test_rpc, [rpc/4]).
@@ -97,3 +121,133 @@ wait_for_cleanup() ->
         cets_test_wait:wait_for_down(Cleaner)
      || {Owner, Cleaner} <- ets:tab2list(cleanup_table), not is_process_alive(Owner)
     ].
+
+make_name(Config) ->
+    make_name(Config, 1).
+
+make_name(Config, Num) when is_integer(Num) ->
+    Testcase = proplists:get_value(testcase, Config),
+    list_to_atom(atom_to_list(Testcase) ++ "_" ++ integer_to_list(Num));
+make_name(Config, Atom) when is_atom(Atom) ->
+    Testcase = proplists:get_value(testcase, Config),
+    list_to_atom(atom_to_list(Testcase) ++ "_" ++ atom_to_list(Atom)).
+
+lock_name(Config) ->
+    Testcase = proplists:get_value(testcase, Config),
+    list_to_atom(atom_to_list(Testcase) ++ "_lock").
+
+disco_name(Config) ->
+    Testcase = proplists:get_value(testcase, Config),
+    list_to_atom(atom_to_list(Testcase) ++ "_disco").
+
+given_two_joined_tables(Config) ->
+    given_two_joined_tables(Config, #{}).
+
+given_two_joined_tables(Config, Opts) ->
+    Tab1 = make_name(Config, 1),
+    Tab2 = make_name(Config, 2),
+    {ok, Pid1} = start_local(Tab1, Opts),
+    {ok, Pid2} = start_local(Tab2, Opts),
+    ok = cets_join:join(lock_name(Config), #{}, Pid1, Pid2),
+    #{
+        tab1 => Tab1,
+        tab2 => Tab2,
+        pid1 => Pid1,
+        pid2 => Pid2,
+        tabs => [Tab1, Tab2],
+        pids => [Pid1, Pid2]
+    }.
+
+given_3_servers(Config) ->
+    given_3_servers(Config, #{}).
+
+given_3_servers(Config, Opts) ->
+    given_n_servers(Config, 3, Opts).
+
+given_n_servers(Config, N, Opts) ->
+    Tabs = [make_name(Config, X) || X <- lists:seq(1, N)],
+    Pids = [
+        begin
+            {ok, Pid} = start_local(Tab, Opts),
+            Pid
+        end
+     || Tab <- Tabs
+    ],
+    #{pids => Pids, tabs => Tabs}.
+
+setup_two_nodes_and_discovery(Config) ->
+    setup_two_nodes_and_discovery(Config, []).
+
+%% Flags:
+%% - disco2 - start discovery on Node2
+%% - wait - call wait_for_ready/2
+setup_two_nodes_and_discovery(Config, Flags) ->
+    Me = self(),
+    Node1 = node(),
+    #{ct2 := Peer2} = proplists:get_value(peers, Config),
+    #{ct2 := Node2} = proplists:get_value(nodes, Config),
+    disconnect_node_by_name(Config, ct2),
+    Tab = make_name(Config),
+    {ok, _Pid1} = start(Node1, Tab),
+    {ok, _Pid2} = start(Peer2, Tab),
+    F = fun(State) ->
+        case lists:member(notify_get_nodes, Flags) of
+            true ->
+                Me ! get_nodes;
+            false ->
+                ok
+        end,
+        {{ok, [Node1, Node2]}, State}
+    end,
+    DiscoName = disco_name(Config),
+    DiscoOpts = #{
+        name => DiscoName, backend_module => cets_discovery_fun, get_nodes_fn => F
+    },
+    Disco = start_disco(Node1, DiscoOpts),
+    %% Start Disco on second node (it is not always needed)
+    Res =
+        case lists:member(disco2, Flags) of
+            true ->
+                Disco2 = start_disco(Node2, DiscoOpts),
+                cets_discovery:add_table(Disco2, Tab),
+                #{disco2 => Disco2};
+            false ->
+                #{}
+        end,
+    cets_discovery:add_table(Disco, Tab),
+    case lists:member(wait, Flags) of
+        true ->
+            cets_test_wait:wait_for_ready(Disco, 5000);
+        false ->
+            ok
+    end,
+    case lists:member(netsplit, Flags) of
+        true ->
+            %% Simulate a loss of connection between nodes
+            disconnect_node_by_name(Config, ct2);
+        false ->
+            ok
+    end,
+    Res#{
+        disco_name => DiscoName,
+        disco_opts => DiscoOpts,
+        disco => Disco,
+        node1 => Node1,
+        node2 => Node2,
+        peer2 => Peer2
+    }.
+
+simulate_disco_restart(#{
+    disco_opts := DiscoOpts,
+    disco2 := Disco2,
+    node1 := Node1,
+    node2 := Node2,
+    peer2 := Peer2
+}) ->
+    %% Instead of restart the node, restart the process. It is enough to get
+    %% a new start_time.
+    disconnect_node(Peer2, Node1),
+    rpc(Peer2, cets, stop, [Disco2]),
+    %% We actually would not detect the case of us just stopping the remote disco
+    %% server. Because we use nodeup/nodedown to detect downs, not monitors.
+    _RestartedDisco2 = start_disco(Node2, DiscoOpts).
diff --git a/test/cets_test_wait.erl b/test/cets_test_wait.erl
index b34e82c..42e03b4 100644
--- a/test/cets_test_wait.erl
+++ b/test/cets_test_wait.erl
@@ -4,7 +4,9 @@
 %% Helpers
 -export([
     wait_for_name_to_be_free/2,
-    wait_for_down/1
+    wait_for_down/1,
+    wait_for_remote_ops_in_the_message_box/2,
+    wait_for_ready/2
 ]).
 
 %% From mongoose_helper
@@ -94,3 +96,20 @@ wait_for_down(Pid) ->
         {'DOWN', Mon, process, Pid, Reason} -> Reason
     after 5000 -> ct:fail({wait_for_down_timeout, Pid})
     end.
+
+wait_for_remote_ops_in_the_message_box(Pid, Count) ->
+    cets_test_wait:wait_until(fun() -> count_remote_ops_in_the_message_box(Pid) end, Count).
+
+count_remote_ops_in_the_message_box(Pid) ->
+    {messages, Messages} = erlang:process_info(Pid, messages),
+    Ops = [M || M <- Messages, element(1, M) =:= remote_op],
+    length(Ops).
+
+wait_for_ready(Disco, Timeout) ->
+    try
+        ok = cets_discovery:wait_for_ready(Disco, Timeout)
+    catch
+        Class:Reason:Stacktrace ->
+            ct:pal("system_info: ~p", [cets_discovery:system_info(Disco)]),
+            erlang:raise(Class, Reason, Stacktrace)
+    end.

From ff5777d4febe0690339e252f8a1b9a4e51583fe5 Mon Sep 17 00:00:00 2001
From: Mikhail Uvarov <arcusfelis@gmail.com>
Date: Tue, 12 Mar 2024 13:18:10 +0100
Subject: [PATCH 04/30] Move wait helpers to cets_test_wait

---
 test/cets_SUITE.erl       | 42 +++++++--------------------------------
 test/cets_test_helper.erl |  7 +++++++
 test/cets_test_wait.erl   | 33 +++++++++++++++++++++++++++++-
 3 files changed, 46 insertions(+), 36 deletions(-)
 create mode 100644 test/cets_test_helper.erl

diff --git a/test/cets_SUITE.erl b/test/cets_SUITE.erl
index 5e82b16..a272a1f 100644
--- a/test/cets_SUITE.erl
+++ b/test/cets_SUITE.erl
@@ -53,7 +53,11 @@
 
 -import(cets_test_wait, [
     wait_for_down/1,
-    wait_for_ready/2
+    wait_for_ready/2,
+    wait_for_disco_timestamp_to_appear/3,
+    wait_for_disco_timestamp_to_be_updated/4,
+    wait_for_unpaused/3,
+    wait_for_join_ref_to_match/2
 ]).
 
 all() ->
@@ -2842,7 +2846,7 @@ disco_nodeup_timestamp_is_updated_after_node_reconnects(Config) ->
     logger_debug_h:start(#{id => ?FUNCTION_NAME}),
     Setup = setup_two_nodes_and_discovery(Config, [wait, disco2]),
     #{disco := Disco, node2 := Node2} = Setup,
-    OldTimestamp = get_disco_timestamp(Disco, nodeup_timestamps, Node2),
+    OldTimestamp = cets_test_helper:get_disco_timestamp(Disco, nodeup_timestamps, Node2),
     disconnect_node_by_name(Config, ct2),
     wait_for_disco_timestamp_to_be_updated(Disco, nodeup_timestamps, Node2, OldTimestamp).
 
@@ -2850,7 +2854,7 @@ disco_node_start_timestamp_is_updated_after_node_restarts(Config) ->
     logger_debug_h:start(#{id => ?FUNCTION_NAME}),
     Setup = setup_two_nodes_and_discovery(Config, [wait, disco2]),
     #{disco := Disco, node2 := Node2} = Setup,
-    OldTimestamp = get_disco_timestamp(Disco, node_start_timestamps, Node2),
+    OldTimestamp = cets_test_helper:get_disco_timestamp(Disco, node_start_timestamps, Node2),
     simulate_disco_restart(Setup),
     wait_for_disco_timestamp_to_be_updated(Disco, node_start_timestamps, Node2, OldTimestamp).
 
@@ -3086,38 +3090,6 @@ test_data_for_duplicate_missing_table_in_status(Config) ->
 return_same(X) ->
     X.
 
-wait_for_disco_timestamp_to_appear(Disco, MapName, NodeKey) ->
-    F = fun() ->
-        #{MapName := Map} = cets_discovery:system_info(Disco),
-        maps:is_key(NodeKey, Map)
-    end,
-    cets_test_wait:wait_until(F, true).
-
-wait_for_disco_timestamp_to_be_updated(Disco, MapName, NodeKey, OldTimestamp) ->
-    Cond = fun() ->
-        NewTimestamp = get_disco_timestamp(Disco, MapName, NodeKey),
-        NewTimestamp =/= OldTimestamp
-    end,
-    cets_test_wait:wait_until(Cond, true).
-
-wait_for_unpaused(Peer, Pid, PausedByPid) ->
-    Cond = fun() ->
-        {monitors, Info} = rpc(Peer, erlang, process_info, [Pid, monitors]),
-        lists:member({process, PausedByPid}, Info)
-    end,
-    cets_test_wait:wait_until(Cond, false).
-
-wait_for_join_ref_to_match(Pid, JoinRef) ->
-    Cond = fun() ->
-        maps:get(join_ref, cets:info(Pid))
-    end,
-    cets_test_wait:wait_until(Cond, JoinRef).
-
-get_disco_timestamp(Disco, MapName, NodeKey) ->
-    Info = cets_discovery:system_info(Disco),
-    #{MapName := #{NodeKey := Timestamp}} = Info,
-    Timestamp.
-
 make_signalling_process() ->
     proc_lib:spawn_link(fun() ->
         receive
diff --git a/test/cets_test_helper.erl b/test/cets_test_helper.erl
new file mode 100644
index 0000000..fb524ec
--- /dev/null
+++ b/test/cets_test_helper.erl
@@ -0,0 +1,7 @@
+-module(cets_test_helper).
+-export([get_disco_timestamp/3]).
+
+get_disco_timestamp(Disco, MapName, NodeKey) ->
+    Info = cets_discovery:system_info(Disco),
+    #{MapName := #{NodeKey := Timestamp}} = Info,
+    Timestamp.
diff --git a/test/cets_test_wait.erl b/test/cets_test_wait.erl
index 42e03b4..c26ef9d 100644
--- a/test/cets_test_wait.erl
+++ b/test/cets_test_wait.erl
@@ -6,7 +6,11 @@
     wait_for_name_to_be_free/2,
     wait_for_down/1,
     wait_for_remote_ops_in_the_message_box/2,
-    wait_for_ready/2
+    wait_for_ready/2,
+    wait_for_disco_timestamp_to_appear/3,
+    wait_for_disco_timestamp_to_be_updated/4,
+    wait_for_unpaused/3,
+    wait_for_join_ref_to_match/2
 ]).
 
 %% From mongoose_helper
@@ -113,3 +117,30 @@ wait_for_ready(Disco, Timeout) ->
             ct:pal("system_info: ~p", [cets_discovery:system_info(Disco)]),
             erlang:raise(Class, Reason, Stacktrace)
     end.
+
+wait_for_disco_timestamp_to_appear(Disco, MapName, NodeKey) ->
+    F = fun() ->
+        #{MapName := Map} = cets_discovery:system_info(Disco),
+        maps:is_key(NodeKey, Map)
+    end,
+    cets_test_wait:wait_until(F, true).
+
+wait_for_disco_timestamp_to_be_updated(Disco, MapName, NodeKey, OldTimestamp) ->
+    Cond = fun() ->
+        NewTimestamp = cets_test_helper:get_disco_timestamp(Disco, MapName, NodeKey),
+        NewTimestamp =/= OldTimestamp
+    end,
+    cets_test_wait:wait_until(Cond, true).
+
+wait_for_unpaused(Peer, Pid, PausedByPid) ->
+    Cond = fun() ->
+        {monitors, Info} = cets_test_rpc:rpc(Peer, erlang, process_info, [Pid, monitors]),
+        lists:member({process, PausedByPid}, Info)
+    end,
+    cets_test_wait:wait_until(Cond, false).
+
+wait_for_join_ref_to_match(Pid, JoinRef) ->
+    Cond = fun() ->
+        maps:get(join_ref, cets:info(Pid))
+    end,
+    cets_test_wait:wait_until(Cond, JoinRef).

From 4445987ec1a5549223aaee5522fa7e31d81079a4 Mon Sep 17 00:00:00 2001
From: Mikhail Uvarov <arcusfelis@gmail.com>
Date: Tue, 12 Mar 2024 13:22:04 +0100
Subject: [PATCH 05/30] Move
 wait_till_test_stage/wait_till_message_queue_length into cets_test_wait

---
 test/cets_SUITE.erl     | 14 +++-----------
 test/cets_test_wait.erl | 18 +++++++++++++++++-
 2 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/test/cets_SUITE.erl b/test/cets_SUITE.erl
index a272a1f..3828c8e 100644
--- a/test/cets_SUITE.erl
+++ b/test/cets_SUITE.erl
@@ -57,7 +57,9 @@
     wait_for_disco_timestamp_to_appear/3,
     wait_for_disco_timestamp_to_be_updated/4,
     wait_for_unpaused/3,
-    wait_for_join_ref_to_match/2
+    wait_for_join_ref_to_match/2,
+    wait_till_test_stage/2,
+    wait_till_message_queue_length/2
 ]).
 
 all() ->
@@ -3027,16 +3029,6 @@ get_pd(Pid, Key) ->
     {dictionary, Dict} = erlang:process_info(Pid, dictionary),
     proplists:get_value(Key, Dict).
 
-wait_till_test_stage(Pid, Stage) ->
-    cets_test_wait:wait_until(fun() -> get_pd(Pid, test_stage) end, Stage).
-
-wait_till_message_queue_length(Pid, Len) ->
-    cets_test_wait:wait_until(fun() -> get_message_queue_length(Pid) end, Len).
-
-get_message_queue_length(Pid) ->
-    {message_queue_len, Len} = erlang:process_info(Pid, message_queue_len),
-    Len.
-
 not_leader(Leader, Other, Leader) ->
     Other;
 not_leader(Other, Leader, Leader) ->
diff --git a/test/cets_test_wait.erl b/test/cets_test_wait.erl
index c26ef9d..4e29344 100644
--- a/test/cets_test_wait.erl
+++ b/test/cets_test_wait.erl
@@ -10,7 +10,9 @@
     wait_for_disco_timestamp_to_appear/3,
     wait_for_disco_timestamp_to_be_updated/4,
     wait_for_unpaused/3,
-    wait_for_join_ref_to_match/2
+    wait_for_join_ref_to_match/2,
+    wait_till_test_stage/2,
+    wait_till_message_queue_length/2
 ]).
 
 %% From mongoose_helper
@@ -144,3 +146,17 @@ wait_for_join_ref_to_match(Pid, JoinRef) ->
         maps:get(join_ref, cets:info(Pid))
     end,
     cets_test_wait:wait_until(Cond, JoinRef).
+
+get_pd(Pid, Key) ->
+    {dictionary, Dict} = erlang:process_info(Pid, dictionary),
+    proplists:get_value(Key, Dict).
+
+wait_till_test_stage(Pid, Stage) ->
+    cets_test_wait:wait_until(fun() -> get_pd(Pid, test_stage) end, Stage).
+
+wait_till_message_queue_length(Pid, Len) ->
+    cets_test_wait:wait_until(fun() -> get_message_queue_length(Pid) end, Len).
+
+get_message_queue_length(Pid) ->
+    {message_queue_len, Len} = erlang:process_info(Pid, message_queue_len),
+    Len.

From c4f5a004fb0e9844b84d85848c2ee63ea5fd7dee Mon Sep 17 00:00:00 2001
From: Mikhail Uvarov <arcusfelis@gmail.com>
Date: Tue, 12 Mar 2024 13:27:33 +0100
Subject: [PATCH 06/30] Create cets_test_receive helper

---
 test/cets_SUITE.erl        | 79 +++++++++++---------------------------
 test/cets_test_receive.erl | 47 +++++++++++++++++++++++
 2 files changed, 69 insertions(+), 57 deletions(-)
 create mode 100644 test/cets_test_receive.erl

diff --git a/test/cets_SUITE.erl b/test/cets_SUITE.erl
index 3828c8e..189212d 100644
--- a/test/cets_SUITE.erl
+++ b/test/cets_SUITE.erl
@@ -62,6 +62,14 @@
     wait_till_message_queue_length/2
 ]).
 
+-import(cets_test_receive, [
+    receive_message/1,
+    receive_message_with_arg/1,
+    flush_message/1,
+    receive_all_logs/1,
+    assert_nothing_is_logged/2
+]).
+
 all() ->
     [
         {group, cets},
@@ -2965,14 +2973,6 @@ ping_pairs_returns_earlier(Config) ->
 
 %% Helper functions
 
-receive_all_logs(Id) ->
-    receive
-        {log, Id, Log} ->
-            [Log | receive_all_logs(Id)]
-    after 100 ->
-        []
-    end.
-
 still_works(Pid) ->
     pong = cets:ping(Pid),
     %% The server works fine
@@ -2989,26 +2989,6 @@ start_link_local(Name, Opts) ->
     schedule_cleanup(Pid),
     {ok, Pid}.
 
-receive_message(M) ->
-    receive
-        M -> ok
-    after 5000 -> error({receive_message_timeout, M})
-    end.
-
-receive_message_with_arg(Tag) ->
-    receive
-        {Tag, Arg} -> Arg
-    after 5000 -> error({receive_message_with_arg_timeout, Tag})
-    end.
-
-flush_message(M) ->
-    receive
-        M ->
-            flush_message(M)
-    after 0 ->
-        ok
-    end.
-
 set_join_ref(Pid, JoinRef) ->
     sys:replace_state(Pid, fun(#{join_ref := _} = State) -> State#{join_ref := JoinRef} end).
 
@@ -3017,6 +2997,12 @@ set_other_servers(Pid, Servers) ->
         State#{other_servers := Servers}
     end).
 
+%% Overwrites nodedown timestamp for the Node in the discovery server state
+set_nodedown_timestamp(Disco, Node, NewTimestamp) ->
+    sys:replace_state(Disco, fun(#{nodedown_timestamps := Map} = State) ->
+        State#{nodedown_timestamps := maps:put(Node, NewTimestamp, Map)}
+    end).
+
 stopped_pid() ->
     %% Get a pid for a stopped process
     {Pid, Mon} = spawn_monitor(fun() -> ok end),
@@ -3025,15 +3011,6 @@ stopped_pid() ->
     end,
     Pid.
 
-get_pd(Pid, Key) ->
-    {dictionary, Dict} = erlang:process_info(Pid, dictionary),
-    proplists:get_value(Key, Dict).
-
-not_leader(Leader, Other, Leader) ->
-    Other;
-not_leader(Other, Leader, Leader) ->
-    Other.
-
 bad_node_pid() ->
     binary_to_term(bad_node_pid_binary()).
 
@@ -3042,16 +3019,10 @@ bad_node_pid_binary() ->
     <<131, 88, 100, 0, 17, 98, 97, 100, 110, 111, 100, 101, 64, 108, 111, 99, 97, 108, 104, 111,
         115, 116, 0, 0, 0, 90, 0, 0, 0, 0, 100, 206, 70, 92>>.
 
-assert_nothing_is_logged(LogHandlerId, LogRef) ->
-    receive
-        {log, LogHandlerId, #{
-            level := Level,
-            msg := {report, #{log_ref := LogRef}}
-        }} when Level =:= warning; Level =:= error ->
-            ct:fail(got_logging_but_should_not)
-    after 0 ->
-        ok
-    end.
+%% Fails if List has duplicates
+assert_unique(List) ->
+    ?assertEqual([], List -- lists:usort(List)),
+    List.
 
 send_join_start_back_and_wait_for_continue_joining() ->
     Me = self(),
@@ -3089,11 +3060,6 @@ make_signalling_process() ->
         end
     end).
 
-%% Fails if List has duplicates
-assert_unique(List) ->
-    ?assertEqual([], List -- lists:usort(List)),
-    List.
-
 make_process() ->
     proc_lib:spawn(fun() ->
         receive
@@ -3101,8 +3067,7 @@ make_process() ->
         end
     end).
 
-%% Overwrites nodedown timestamp for the Node in the discovery server state
-set_nodedown_timestamp(Disco, Node, NewTimestamp) ->
-    sys:replace_state(Disco, fun(#{nodedown_timestamps := Map} = State) ->
-        State#{nodedown_timestamps := maps:put(Node, NewTimestamp, Map)}
-    end).
+not_leader(Leader, Other, Leader) ->
+    Other;
+not_leader(Other, Leader, Leader) ->
+    Other.
diff --git a/test/cets_test_receive.erl b/test/cets_test_receive.erl
new file mode 100644
index 0000000..6fd2116
--- /dev/null
+++ b/test/cets_test_receive.erl
@@ -0,0 +1,47 @@
+-module(cets_test_receive).
+-export([
+    receive_message/1,
+    receive_message_with_arg/1,
+    flush_message/1,
+    receive_all_logs/1,
+    assert_nothing_is_logged/2
+]).
+
+receive_message(M) ->
+    receive
+        M -> ok
+    after 5000 -> error({receive_message_timeout, M})
+    end.
+
+receive_message_with_arg(Tag) ->
+    receive
+        {Tag, Arg} -> Arg
+    after 5000 -> error({receive_message_with_arg_timeout, Tag})
+    end.
+
+flush_message(M) ->
+    receive
+        M ->
+            flush_message(M)
+    after 0 ->
+        ok
+    end.
+
+receive_all_logs(Id) ->
+    receive
+        {log, Id, Log} ->
+            [Log | receive_all_logs(Id)]
+    after 100 ->
+        []
+    end.
+
+assert_nothing_is_logged(LogHandlerId, LogRef) ->
+    receive
+        {log, LogHandlerId, #{
+            level := Level,
+            msg := {report, #{log_ref := LogRef}}
+        }} when Level =:= warning; Level =:= error ->
+            ct:fail(got_logging_but_should_not)
+    after 0 ->
+        ok
+    end.

From 616243c85b3cb2eaab2c66fbc8a7d5dc21388e0d Mon Sep 17 00:00:00 2001
From: Mikhail Uvarov <arcusfelis@gmail.com>
Date: Tue, 12 Mar 2024 13:32:48 +0100
Subject: [PATCH 07/30] Move make_process() into cets_test_setup

---
 test/cets_SUITE.erl      | 21 ++++++---------------
 test/cets_test_setup.erl | 19 +++++++++++++++++++
 2 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/test/cets_SUITE.erl b/test/cets_SUITE.erl
index 189212d..c46cbee 100644
--- a/test/cets_SUITE.erl
+++ b/test/cets_SUITE.erl
@@ -51,6 +51,11 @@
     simulate_disco_restart/1
 ]).
 
+-import(cets_test_setup, [
+    make_signalling_process/0,
+    make_process/0
+]).
+
 -import(cets_test_wait, [
     wait_for_down/1,
     wait_for_ready/2,
@@ -1332,7 +1337,7 @@ ignore_send_dump_received_when_unpaused(Config) ->
 pause_on_remote_node_returns_if_monitor_process_dies(Config) ->
     JoinPid = make_process(),
     #{ct2 := Node2} = proplists:get_value(nodes, Config),
-    AllPids = [rpc(Node2, ?MODULE, make_process, [])],
+    AllPids = [rpc(Node2, cets_test_setup, make_process, [])],
     TestPid = proc_lib:spawn(fun() ->
         %% Would block
         cets_join:pause_on_remote_node(JoinPid, AllPids)
@@ -3053,20 +3058,6 @@ test_data_for_duplicate_missing_table_in_status(Config) ->
 return_same(X) ->
     X.
 
-make_signalling_process() ->
-    proc_lib:spawn_link(fun() ->
-        receive
-            stop -> ok
-        end
-    end).
-
-make_process() ->
-    proc_lib:spawn(fun() ->
-        receive
-            stop -> stop
-        end
-    end).
-
 not_leader(Leader, Other, Leader) ->
     Other;
 not_leader(Other, Leader, Leader) ->
diff --git a/test/cets_test_setup.erl b/test/cets_test_setup.erl
index c92cdee..c0b8a9f 100644
--- a/test/cets_test_setup.erl
+++ b/test/cets_test_setup.erl
@@ -36,6 +36,11 @@
 
 -export([simulate_disco_restart/1]).
 
+-export([
+    make_signalling_process/0,
+    make_process/0
+]).
+
 -import(cets_test_node, [
     disconnect_node/2,
     disconnect_node_by_name/2
@@ -251,3 +256,17 @@ simulate_disco_restart(#{
     %% We actually would not detect the case of us just stopping the remote disco
     %% server. Because we use nodeup/nodedown to detect downs, not monitors.
     _RestartedDisco2 = start_disco(Node2, DiscoOpts).
+
+make_signalling_process() ->
+    proc_lib:spawn_link(fun() ->
+        receive
+            stop -> ok
+        end
+    end).
+
+make_process() ->
+    proc_lib:spawn(fun() ->
+        receive
+            stop -> stop
+        end
+    end).

From 2c9130ec7c2d0bc369c5d060c37406e267af0976 Mon Sep 17 00:00:00 2001
From: Mikhail Uvarov <arcusfelis@gmail.com>
Date: Tue, 12 Mar 2024 14:14:39 +0100
Subject: [PATCH 08/30] Move test cases into cets_disco_SUITE

---
 test/cets_SUITE.erl       | 188 +---------------------------
 test/cets_disco_SUITE.erl | 257 ++++++++++++++++++++++++++++++++++++++
 test/cets_test_helper.erl |  12 +-
 test/cets_test_node.erl   |  37 ------
 test/cets_test_peer.erl   |  56 +++++++++
 test/cets_test_setup.erl  |  10 +-
 6 files changed, 338 insertions(+), 222 deletions(-)
 create mode 100644 test/cets_disco_SUITE.erl
 delete mode 100644 test/cets_test_node.erl

diff --git a/test/cets_SUITE.erl b/test/cets_SUITE.erl
index c46cbee..86286e8 100644
--- a/test/cets_SUITE.erl
+++ b/test/cets_SUITE.erl
@@ -12,11 +12,10 @@
 
 -compile([export_all, nowarn_export_all]).
 
--import(cets_test_node, [
+-import(cets_test_peer, [
     block_node/2,
     reconnect_node/2,
-    disconnect_node/2,
-    disconnect_node_by_name/2
+    disconnect_node/2
 ]).
 
 -import(cets_test_rpc, [
@@ -45,10 +44,7 @@
     given_two_joined_tables/2,
     given_3_servers/1,
     given_3_servers/2,
-    given_n_servers/3,
-    setup_two_nodes_and_discovery/1,
-    setup_two_nodes_and_discovery/2,
-    simulate_disco_restart/1
+    given_n_servers/3
 ]).
 
 -import(cets_test_setup, [
@@ -59,8 +55,6 @@
 -import(cets_test_wait, [
     wait_for_down/1,
     wait_for_ready/2,
-    wait_for_disco_timestamp_to_appear/3,
-    wait_for_disco_timestamp_to_be_updated/4,
     wait_for_unpaused/3,
     wait_for_join_ref_to_match/2,
     wait_till_test_stage/2,
@@ -70,11 +64,12 @@
 -import(cets_test_receive, [
     receive_message/1,
     receive_message_with_arg/1,
-    flush_message/1,
     receive_all_logs/1,
     assert_nothing_is_logged/2
 ]).
 
+-import(cets_test_helper, [assert_unique/1]).
+
 all() ->
     [
         {group, cets},
@@ -265,16 +260,6 @@ seq_cases() ->
         logging_when_failing_join_with_disco,
         cets_ping_all_returns_when_ping_crashes,
         join_interrupted_when_ping_crashes,
-        disco_logs_nodeup,
-        disco_logs_nodedown,
-        disco_logs_nodeup_after_downtime,
-        disco_logs_node_reconnects_after_downtime,
-        disco_node_up_timestamp_is_remembered,
-        disco_node_down_timestamp_is_remembered,
-        disco_nodeup_timestamp_is_updated_after_node_reconnects,
-        disco_node_start_timestamp_is_updated_after_node_restarts,
-        disco_late_pang_result_arrives_after_node_went_up,
-        disco_nodeup_triggers_check_and_get_nodes,
         ping_pairs_returns_pongs,
         ping_pairs_returns_earlier,
         pre_connect_fails_on_our_node,
@@ -288,11 +273,6 @@ cets_seq_no_log_cases() ->
     [
         join_interrupted_when_ping_crashes,
         node_down_history_is_updated_when_netsplit_happens,
-        disco_node_up_timestamp_is_remembered,
-        disco_node_down_timestamp_is_remembered,
-        disco_nodeup_timestamp_is_updated_after_node_reconnects,
-        disco_node_start_timestamp_is_updated_after_node_restarts,
-        disco_late_pang_result_arrives_after_node_went_up,
         send_check_servers_is_called_before_last_server_got_dump,
         remote_ops_are_not_sent_before_last_server_got_dump
     ].
@@ -308,6 +288,7 @@ init_per_suite(Config) ->
     ].
 
 end_per_suite(Config) ->
+    cets_test_setup:remove_cleanup_table(),
     Config.
 
 init_per_group(Group, Config) when Group == cets_seq_no_log; Group == cets_no_log ->
@@ -2760,158 +2741,6 @@ node_down_history_is_updated_when_netsplit_happens(Config) ->
         cets:stop(Pid5)
     end.
 
-disco_logs_nodeup(Config) ->
-    logger_debug_h:start(#{id => ?FUNCTION_NAME}),
-    #{disco := Disco, node2 := Node2} = setup_two_nodes_and_discovery(Config),
-    %% There could be several disco processes still running from the previous tests,
-    %% filter out logs by pid.
-    receive
-        {log, ?FUNCTION_NAME, #{
-            level := warning,
-            meta := #{pid := Disco},
-            msg := {report, #{what := nodeup, remote_node := Node2} = R}
-        }} = M ->
-            ?assert(is_integer(maps:get(connected_nodes, R)), M),
-            ?assert(is_integer(maps:get(time_since_startup_in_milliseconds, R)), M)
-    after 5000 ->
-        ct:fail(timeout)
-    end.
-
-disco_node_up_timestamp_is_remembered(Config) ->
-    #{disco := Disco, node2 := Node2} = setup_two_nodes_and_discovery(Config),
-    %% Check that nodeup is remembered
-    wait_for_disco_timestamp_to_appear(Disco, nodeup_timestamps, Node2).
-
-disco_logs_nodedown(Config) ->
-    logger_debug_h:start(#{id => ?FUNCTION_NAME}),
-    ok = net_kernel:monitor_nodes(true),
-    #{disco := Disco, node2 := Node2} = setup_two_nodes_and_discovery(Config, [wait, netsplit]),
-    receive_message({nodedown, Node2}),
-    receive
-        {log, ?FUNCTION_NAME, #{
-            level := warning,
-            meta := #{pid := Disco},
-            msg := {report, #{what := nodedown, remote_node := Node2} = R}
-        }} = M ->
-            ?assert(is_integer(maps:get(connected_nodes, R)), M),
-            ?assert(is_integer(maps:get(time_since_startup_in_milliseconds, R)), M),
-            ?assert(is_integer(maps:get(connected_millisecond_duration, R)), M)
-    after 5000 ->
-        ct:fail(timeout)
-    end.
-
-disco_node_down_timestamp_is_remembered(Config) ->
-    #{disco := Disco, node2 := Node2} = setup_two_nodes_and_discovery(Config, [wait, netsplit]),
-    %% Check that nodedown is remembered
-    wait_for_disco_timestamp_to_appear(Disco, nodedown_timestamps, Node2).
-
-disco_logs_nodeup_after_downtime(Config) ->
-    logger_debug_h:start(#{id => ?FUNCTION_NAME}),
-    #{disco := Disco, node2 := Node2} = setup_two_nodes_and_discovery(Config, [wait, netsplit]),
-    %% At this point cets_disco should reconnect nodes back automatically
-    %% after retry_type_to_timeout(after_nodedown) time.
-    %% We want to speed this up for tests though.
-    Disco ! check,
-    %% Receive a nodeup after the disconnect.
-    %% This nodeup should contain the downtime_millisecond_duration field
-    %% (initial nodeup should not contain this field).
-    receive
-        {log, ?FUNCTION_NAME, #{
-            level := warning,
-            meta := #{pid := Disco},
-            msg :=
-                {report,
-                    #{
-                        what := nodeup,
-                        remote_node := Node2,
-                        downtime_millisecond_duration := Downtime
-                    } = R}
-        }} = M ->
-            ?assert(is_integer(maps:get(connected_nodes, R)), M),
-            ?assert(is_integer(Downtime), M)
-    after 5000 ->
-        ct:fail(timeout)
-    end.
-
-disco_logs_node_reconnects_after_downtime(Config) ->
-    logger_debug_h:start(#{id => ?FUNCTION_NAME}),
-    Setup = setup_two_nodes_and_discovery(Config, [wait, disco2]),
-    #{disco := Disco, node1 := Node1, node2 := Node2, peer2 := Peer2} = Setup,
-    %% Check that a start timestamp from a remote node is stored
-    Info = cets_discovery:system_info(Disco),
-    ?assertMatch(#{node_start_timestamps := #{Node2 := _}}, Info),
-    disconnect_node(Peer2, Node1),
-    receive
-        {log, ?FUNCTION_NAME, #{
-            level := warning,
-            meta := #{pid := Disco},
-            msg :=
-                {report, #{
-                    what := node_reconnects,
-                    start_time := StartTime,
-                    remote_node := Node2
-                }}
-        }} = M ->
-            ?assert(is_integer(StartTime), M)
-    after 5000 ->
-        ct:fail(timeout)
-    end.
-
-disco_nodeup_timestamp_is_updated_after_node_reconnects(Config) ->
-    logger_debug_h:start(#{id => ?FUNCTION_NAME}),
-    Setup = setup_two_nodes_and_discovery(Config, [wait, disco2]),
-    #{disco := Disco, node2 := Node2} = Setup,
-    OldTimestamp = cets_test_helper:get_disco_timestamp(Disco, nodeup_timestamps, Node2),
-    disconnect_node_by_name(Config, ct2),
-    wait_for_disco_timestamp_to_be_updated(Disco, nodeup_timestamps, Node2, OldTimestamp).
-
-disco_node_start_timestamp_is_updated_after_node_restarts(Config) ->
-    logger_debug_h:start(#{id => ?FUNCTION_NAME}),
-    Setup = setup_two_nodes_and_discovery(Config, [wait, disco2]),
-    #{disco := Disco, node2 := Node2} = Setup,
-    OldTimestamp = cets_test_helper:get_disco_timestamp(Disco, node_start_timestamps, Node2),
-    simulate_disco_restart(Setup),
-    wait_for_disco_timestamp_to_be_updated(Disco, node_start_timestamps, Node2, OldTimestamp).
-
-disco_late_pang_result_arrives_after_node_went_up(Config) ->
-    Node1 = node(),
-    #{ct2 := Node2} = proplists:get_value(nodes, Config),
-    %% unavailable_nodes list contains nodes which have not responded to pings.
-    %% Ping is async though.
-    %% So, there could be the situation when the result of ping would be processed
-    %% after the node actually got connected.
-    meck:new(cets_ping, [passthrough]),
-    Me = self(),
-    meck:expect(cets_ping, send_ping_result, fun(Pid, Node, _PingResult) ->
-        %% Wait until Node is up
-        Cond = fun() -> lists:member(Node, nodes()) end,
-        cets_test_wait:wait_until(Cond, true),
-        Me ! send_ping_result_called,
-        %% Return pang to cets_discovery.
-        %% cets_join does not use send_ping_result function
-        %% and would receive pong and join correctly.
-        meck:passthrough([Pid, Node, pang])
-    end),
-    try
-        %% setup_two_nodes_and_discovery would call disconnect_node/2 function
-        Setup = setup_two_nodes_and_discovery(Config, [wait, disco2]),
-        receive_message(send_ping_result_called),
-        #{disco_name := DiscoName} = Setup,
-        Status = cets_status:status(DiscoName),
-        %% Check that pang is ignored and unavailable_nodes list is empty.
-        ?assertMatch([], maps:get(unavailable_nodes, Status)),
-        ?assertMatch([Node1, Node2], maps:get(joined_nodes, Status))
-    after
-        meck:unload()
-    end.
-
-disco_nodeup_triggers_check_and_get_nodes(Config) ->
-    Setup = setup_two_nodes_and_discovery(Config, [wait, notify_get_nodes]),
-    #{disco := Disco, node2 := Node2} = Setup,
-    flush_message(get_nodes),
-    Disco ! {nodeup, Node2},
-    receive_message(get_nodes).
-
 format_data_does_not_return_table_duplicates(Config) ->
     Res = cets_status:format_data(test_data_for_duplicate_missing_table_in_status(Config)),
     ?assertMatch(#{remote_unknown_tables := [], remote_nodes_with_missing_tables := []}, Res).
@@ -3024,11 +2853,6 @@ bad_node_pid_binary() ->
     <<131, 88, 100, 0, 17, 98, 97, 100, 110, 111, 100, 101, 64, 108, 111, 99, 97, 108, 104, 111,
         115, 116, 0, 0, 0, 90, 0, 0, 0, 0, 100, 206, 70, 92>>.
 
-%% Fails if List has duplicates
-assert_unique(List) ->
-    ?assertEqual([], List -- lists:usort(List)),
-    List.
-
 send_join_start_back_and_wait_for_continue_joining() ->
     Me = self(),
     fun
diff --git a/test/cets_disco_SUITE.erl b/test/cets_disco_SUITE.erl
new file mode 100644
index 0000000..82023f3
--- /dev/null
+++ b/test/cets_disco_SUITE.erl
@@ -0,0 +1,257 @@
+-module(cets_disco_SUITE).
+-include_lib("common_test/include/ct.hrl").
+-include_lib("eunit/include/eunit.hrl").
+-include_lib("kernel/include/logger.hrl").
+
+-compile([export_all, nowarn_export_all]).
+
+-import(cets_test_setup, [
+    setup_two_nodes_and_discovery/1,
+    setup_two_nodes_and_discovery/2,
+    simulate_disco_restart/1
+]).
+
+-import(cets_test_wait, [
+    wait_for_disco_timestamp_to_appear/3,
+    wait_for_disco_timestamp_to_be_updated/4
+]).
+
+-import(cets_test_receive, [
+    receive_message/1,
+    flush_message/1
+]).
+
+-import(cets_test_peer, [
+    disconnect_node/2,
+    disconnect_node_by_name/2
+]).
+
+-import(cets_test_helper, [assert_unique/1]).
+
+all() ->
+    [
+        {group, cets_seq},
+        {group, cets_seq_no_log}
+    ].
+
+groups() ->
+    %% Cases should have unique names, because we name CETS servers based on case names
+    [
+        %% These tests actually simulate a netsplit on the distribution level.
+        %% Though, global's prevent_overlapping_partitions option starts kicking
+        %% all nodes from the cluster, so we have to be careful not to break other cases.
+        %% Setting prevent_overlapping_partitions=false on ct5 helps.
+        {cets_seq, [sequence, {repeat_until_any_fail, 2}], assert_unique(seq_cases())},
+        {cets_seq_no_log, [sequence, {repeat_until_any_fail, 2}],
+            assert_unique(cets_seq_no_log_cases())}
+    ].
+
+seq_cases() ->
+    [
+        disco_logs_nodeup,
+        disco_logs_nodedown,
+        disco_logs_nodeup_after_downtime,
+        disco_logs_node_reconnects_after_downtime,
+        disco_node_up_timestamp_is_remembered,
+        disco_node_down_timestamp_is_remembered,
+        disco_nodeup_timestamp_is_updated_after_node_reconnects,
+        disco_node_start_timestamp_is_updated_after_node_restarts,
+        disco_late_pang_result_arrives_after_node_went_up,
+        disco_nodeup_triggers_check_and_get_nodes
+    ].
+
+cets_seq_no_log_cases() ->
+    [
+        disco_node_up_timestamp_is_remembered,
+        disco_node_down_timestamp_is_remembered,
+        disco_nodeup_timestamp_is_updated_after_node_reconnects,
+        disco_node_start_timestamp_is_updated_after_node_restarts,
+        disco_late_pang_result_arrives_after_node_went_up
+    ].
+
+init_per_suite(Config) ->
+    cets_test_setup:init_cleanup_table(),
+    cets_test_peer:start([ct2], Config).
+
+end_per_suite(Config) ->
+    cets_test_setup:remove_cleanup_table(),
+    cets_test_peer:stop(Config),
+    Config.
+
+init_per_group(Group, Config) when Group == cets_seq_no_log; Group == cets_no_log ->
+    [ok = logger:set_module_level(M, none) || M <- log_modules()],
+    Config;
+init_per_group(_Group, Config) ->
+    Config.
+
+end_per_group(Group, Config) when Group == cets_seq_no_log; Group == cets_no_log ->
+    [ok = logger:unset_module_level(M) || M <- log_modules()],
+    Config;
+end_per_group(_Group, Config) ->
+    Config.
+
+init_per_testcase(Name, Config) ->
+    init_per_testcase_generic(Name, Config).
+
+init_per_testcase_generic(Name, Config) ->
+    [{testcase, Name} | Config].
+
+end_per_testcase(_, _Config) ->
+    cets_test_setup:wait_for_cleanup(),
+    ok.
+
+%% Modules that use a multiline LOG_ macro
+log_modules() ->
+    [cets, cets_call, cets_long, cets_join, cets_discovery].
+
+disco_logs_nodeup(Config) ->
+    logger_debug_h:start(#{id => ?FUNCTION_NAME}),
+    #{disco := Disco, node2 := Node2} = setup_two_nodes_and_discovery(Config),
+    %% There could be several disco processes still running from the previous tests,
+    %% filter out logs by pid.
+    receive
+        {log, ?FUNCTION_NAME, #{
+            level := warning,
+            meta := #{pid := Disco},
+            msg := {report, #{what := nodeup, remote_node := Node2} = R}
+        }} = M ->
+            ?assert(is_integer(maps:get(connected_nodes, R)), M),
+            ?assert(is_integer(maps:get(time_since_startup_in_milliseconds, R)), M)
+    after 5000 ->
+        ct:fail(timeout)
+    end.
+
+disco_node_up_timestamp_is_remembered(Config) ->
+    #{disco := Disco, node2 := Node2} = setup_two_nodes_and_discovery(Config),
+    %% Check that nodeup is remembered
+    wait_for_disco_timestamp_to_appear(Disco, nodeup_timestamps, Node2).
+
+disco_logs_nodedown(Config) ->
+    logger_debug_h:start(#{id => ?FUNCTION_NAME}),
+    ok = net_kernel:monitor_nodes(true),
+    #{disco := Disco, node2 := Node2} = setup_two_nodes_and_discovery(Config, [wait, netsplit]),
+    receive_message({nodedown, Node2}),
+    receive
+        {log, ?FUNCTION_NAME, #{
+            level := warning,
+            meta := #{pid := Disco},
+            msg := {report, #{what := nodedown, remote_node := Node2} = R}
+        }} = M ->
+            ?assert(is_integer(maps:get(connected_nodes, R)), M),
+            ?assert(is_integer(maps:get(time_since_startup_in_milliseconds, R)), M),
+            ?assert(is_integer(maps:get(connected_millisecond_duration, R)), M)
+    after 5000 ->
+        ct:fail(timeout)
+    end.
+
+disco_node_down_timestamp_is_remembered(Config) ->
+    #{disco := Disco, node2 := Node2} = setup_two_nodes_and_discovery(Config, [wait, netsplit]),
+    %% Check that nodedown is remembered
+    wait_for_disco_timestamp_to_appear(Disco, nodedown_timestamps, Node2).
+
+disco_logs_nodeup_after_downtime(Config) ->
+    logger_debug_h:start(#{id => ?FUNCTION_NAME}),
+    #{disco := Disco, node2 := Node2} = setup_two_nodes_and_discovery(Config, [wait, netsplit]),
+    %% At this point cets_disco should reconnect nodes back automatically
+    %% after retry_type_to_timeout(after_nodedown) time.
+    %% We want to speed this up for tests though.
+    Disco ! check,
+    %% Receive a nodeup after the disconnect.
+    %% This nodeup should contain the downtime_millisecond_duration field
+    %% (initial nodeup should not contain this field).
+    receive
+        {log, ?FUNCTION_NAME, #{
+            level := warning,
+            meta := #{pid := Disco},
+            msg :=
+                {report,
+                    #{
+                        what := nodeup,
+                        remote_node := Node2,
+                        downtime_millisecond_duration := Downtime
+                    } = R}
+        }} = M ->
+            ?assert(is_integer(maps:get(connected_nodes, R)), M),
+            ?assert(is_integer(Downtime), M)
+    after 5000 ->
+        ct:fail(timeout)
+    end.
+
+disco_logs_node_reconnects_after_downtime(Config) ->
+    logger_debug_h:start(#{id => ?FUNCTION_NAME}),
+    Setup = setup_two_nodes_and_discovery(Config, [wait, disco2]),
+    #{disco := Disco, node1 := Node1, node2 := Node2, peer2 := Peer2} = Setup,
+    %% Check that a start timestamp from a remote node is stored
+    Info = cets_discovery:system_info(Disco),
+    ?assertMatch(#{node_start_timestamps := #{Node2 := _}}, Info),
+    disconnect_node(Peer2, Node1),
+    receive
+        {log, ?FUNCTION_NAME, #{
+            level := warning,
+            meta := #{pid := Disco},
+            msg :=
+                {report, #{
+                    what := node_reconnects,
+                    start_time := StartTime,
+                    remote_node := Node2
+                }}
+        }} = M ->
+            ?assert(is_integer(StartTime), M)
+    after 5000 ->
+        ct:fail(timeout)
+    end.
+
+disco_nodeup_timestamp_is_updated_after_node_reconnects(Config) ->
+    logger_debug_h:start(#{id => ?FUNCTION_NAME}),
+    Setup = setup_two_nodes_and_discovery(Config, [wait, disco2]),
+    #{disco := Disco, node2 := Node2} = Setup,
+    OldTimestamp = cets_test_helper:get_disco_timestamp(Disco, nodeup_timestamps, Node2),
+    disconnect_node_by_name(Config, ct2),
+    wait_for_disco_timestamp_to_be_updated(Disco, nodeup_timestamps, Node2, OldTimestamp).
+
+disco_node_start_timestamp_is_updated_after_node_restarts(Config) ->
+    logger_debug_h:start(#{id => ?FUNCTION_NAME}),
+    Setup = setup_two_nodes_and_discovery(Config, [wait, disco2]),
+    #{disco := Disco, node2 := Node2} = Setup,
+    OldTimestamp = cets_test_helper:get_disco_timestamp(Disco, node_start_timestamps, Node2),
+    simulate_disco_restart(Setup),
+    wait_for_disco_timestamp_to_be_updated(Disco, node_start_timestamps, Node2, OldTimestamp).
+
+disco_late_pang_result_arrives_after_node_went_up(Config) ->
+    Node1 = node(),
+    #{ct2 := Node2} = proplists:get_value(nodes, Config),
+    %% unavailable_nodes list contains nodes which have not responded to pings.
+    %% Ping is async though.
+    %% So, there could be the situation when the result of ping would be processed
+    %% after the node actually got connected.
+    meck:new(cets_ping, [passthrough]),
+    Me = self(),
+    meck:expect(cets_ping, send_ping_result, fun(Pid, Node, _PingResult) ->
+        %% Wait until Node is up
+        Cond = fun() -> lists:member(Node, nodes()) end,
+        cets_test_wait:wait_until(Cond, true),
+        Me ! send_ping_result_called,
+        %% Return pang to cets_discovery.
+        %% cets_join does not use send_ping_result function
+        %% and would receive pong and join correctly.
+        meck:passthrough([Pid, Node, pang])
+    end),
+    try
+        %% setup_two_nodes_and_discovery would call disconnect_node/2 function
+        Setup = setup_two_nodes_and_discovery(Config, [wait, disco2]),
+        receive_message(send_ping_result_called),
+        #{disco_name := DiscoName} = Setup,
+        Status = cets_status:status(DiscoName),
+        %% Check that pang is ignored and unavailable_nodes list is empty.
+        ?assertMatch([], maps:get(unavailable_nodes, Status)),
+        ?assertMatch([Node1, Node2], maps:get(joined_nodes, Status))
+    after
+        meck:unload()
+    end.
+
+disco_nodeup_triggers_check_and_get_nodes(Config) ->
+    Setup = setup_two_nodes_and_discovery(Config, [wait, notify_get_nodes]),
+    #{disco := Disco, node2 := Node2} = Setup,
+    flush_message(get_nodes),
+    Disco ! {nodeup, Node2},
+    receive_message(get_nodes).
diff --git a/test/cets_test_helper.erl b/test/cets_test_helper.erl
index fb524ec..144bf54 100644
--- a/test/cets_test_helper.erl
+++ b/test/cets_test_helper.erl
@@ -1,7 +1,17 @@
 -module(cets_test_helper).
--export([get_disco_timestamp/3]).
+-include_lib("eunit/include/eunit.hrl").
+
+-export([
+    get_disco_timestamp/3,
+    assert_unique/1
+]).
 
 get_disco_timestamp(Disco, MapName, NodeKey) ->
     Info = cets_discovery:system_info(Disco),
     #{MapName := #{NodeKey := Timestamp}} = Info,
     Timestamp.
+
+%% Fails if List has duplicates
+assert_unique(List) ->
+    ?assertEqual([], List -- lists:usort(List)),
+    List.
diff --git a/test/cets_test_node.erl b/test/cets_test_node.erl
deleted file mode 100644
index 1f7421b..0000000
--- a/test/cets_test_node.erl
+++ /dev/null
@@ -1,37 +0,0 @@
--module(cets_test_node).
--export([
-    block_node/2,
-    reconnect_node/2,
-    disconnect_node/2,
-    disconnect_node_by_name/2
-]).
-
--import(cets_test_rpc, [rpc/4]).
-
-%% Disconnect node until manually connected
-block_node(Node, Peer) when is_atom(Node), is_pid(Peer) ->
-    rpc(Peer, erlang, set_cookie, [node(), invalid_cookie]),
-    disconnect_node(Peer, node()),
-    %% Wait till node() is notified about the disconnect
-    cets_test_wait:wait_until(fun() -> rpc(Peer, net_adm, ping, [node()]) end, pang),
-    cets_test_wait:wait_until(fun() -> rpc(node(), net_adm, ping, [Node]) end, pang).
-
-reconnect_node(Node, Peer) when is_atom(Node), is_pid(Peer) ->
-    rpc(Peer, erlang, set_cookie, [node(), erlang:get_cookie()]),
-    %% Very rarely it could return pang
-    cets_test_wait:wait_until(fun() -> rpc(Peer, net_adm, ping, [node()]) end, pong),
-    cets_test_wait:wait_until(fun() -> rpc(node(), net_adm, ping, [Node]) end, pong).
-
-disconnect_node(RPCNode, DisconnectNode) ->
-    rpc(RPCNode, erlang, disconnect_node, [DisconnectNode]).
-
-disconnect_node_by_name(Config, Id) ->
-    Peer = maps:get(Id, proplists:get_value(peers, Config)),
-    Node = maps:get(Id, proplists:get_value(nodes, Config)),
-    %% We could need to retry to disconnect, if the local node is currently trying to establish a connection
-    %% with Node2 (could be triggered by the previous tests)
-    F = fun() ->
-        disconnect_node(Peer, node()),
-        lists:member(Node, nodes())
-    end,
-    cets_test_wait:wait_until(F, false).
diff --git a/test/cets_test_peer.erl b/test/cets_test_peer.erl
index 81f86f0..d821a67 100644
--- a/test/cets_test_peer.erl
+++ b/test/cets_test_peer.erl
@@ -1,10 +1,38 @@
 -module(cets_test_peer).
 -export([
+    start/2,
+    stop/1,
     start_node/1,
     node_to_peer/1
 ]).
+
+-export([
+    block_node/2,
+    reconnect_node/2,
+    disconnect_node/2,
+    disconnect_node_by_name/2
+]).
+
+-import(cets_test_rpc, [rpc/4]).
+
 -include_lib("common_test/include/ct.hrl").
 
+start(Names, Config) ->
+    {Nodes, Peers} = lists:unzip([cets_test_peer:start_node(name(N)) || N <- Names]),
+    [
+        {nodes, maps:from_list(lists:zip(Names, Nodes))},
+        {peers, maps:from_list(lists:zip(Names, Peers))}
+        | Config
+    ].
+
+stop(Config) ->
+    Peers = proplists:get_value(peers, Config),
+    [peer:stop(Peer) || Peer <- maps:values(Peers)],
+    ok.
+
+name(Node) ->
+    list_to_atom(peer:random_name(atom_to_list(Node))).
+
 start_node(Sname) ->
     {ok, Peer, Node} = ?CT_PEER(#{
         name => Sname, connection => standard_io, args => extra_args(Sname)
@@ -40,3 +68,31 @@ extra_args(X) when X == ct5; X == ct6; X == ct7 ->
     ["-kernel", "prevent_overlapping_partitions", "false"];
 extra_args(_) ->
     "".
+
+%% Disconnect node until manually connected
+block_node(Node, Peer) when is_atom(Node), is_pid(Peer) ->
+    rpc(Peer, erlang, set_cookie, [node(), invalid_cookie]),
+    disconnect_node(Peer, node()),
+    %% Wait till node() is notified about the disconnect
+    cets_test_wait:wait_until(fun() -> rpc(Peer, net_adm, ping, [node()]) end, pang),
+    cets_test_wait:wait_until(fun() -> rpc(node(), net_adm, ping, [Node]) end, pang).
+
+reconnect_node(Node, Peer) when is_atom(Node), is_pid(Peer) ->
+    rpc(Peer, erlang, set_cookie, [node(), erlang:get_cookie()]),
+    %% Very rarely it could return pang
+    cets_test_wait:wait_until(fun() -> rpc(Peer, net_adm, ping, [node()]) end, pong),
+    cets_test_wait:wait_until(fun() -> rpc(node(), net_adm, ping, [Node]) end, pong).
+
+disconnect_node(RPCNode, DisconnectNode) ->
+    rpc(RPCNode, erlang, disconnect_node, [DisconnectNode]).
+
+disconnect_node_by_name(Config, Id) ->
+    Peer = maps:get(Id, proplists:get_value(peers, Config)),
+    Node = maps:get(Id, proplists:get_value(nodes, Config)),
+    %% We could need to retry to disconnect, if the local node is currently trying to establish a connection
+    %% with Node2 (could be triggered by the previous tests)
+    F = fun() ->
+        disconnect_node(Peer, node()),
+        lists:member(Node, nodes())
+    end,
+    cets_test_wait:wait_until(F, false).
diff --git a/test/cets_test_setup.erl b/test/cets_test_setup.erl
index c0b8a9f..3decf1d 100644
--- a/test/cets_test_setup.erl
+++ b/test/cets_test_setup.erl
@@ -6,6 +6,7 @@
 
 -export([
     init_cleanup_table/0,
+    remove_cleanup_table/0,
     wait_for_cleanup/0
 ]).
 
@@ -41,7 +42,7 @@
     make_process/0
 ]).
 
--import(cets_test_node, [
+-import(cets_test_peer, [
     disconnect_node/2,
     disconnect_node_by_name/2
 ]).
@@ -117,9 +118,14 @@ schedule_cleanup(Pid) ->
 init_cleanup_table() ->
     spawn(fun() ->
         ets:new(cleanup_table, [named_table, public, bag]),
-        timer:sleep(infinity)
+        receive
+            stop -> ok
+        end
     end).
 
+remove_cleanup_table() ->
+    ets:info(cleanup_table, owner) ! stop.
+
 %% schedule_cleanup is async, so this function is waiting for it to finish
 wait_for_cleanup() ->
     [

From 28b67f87751148a997f77344c3a2af28843cde07 Mon Sep 17 00:00:00 2001
From: Mikhail Uvarov <arcusfelis@gmail.com>
Date: Tue, 12 Mar 2024 14:27:00 +0100
Subject: [PATCH 09/30] Move small disco cases into disco suite

---
 test/cets_SUITE.erl       | 301 -----------------------------------
 test/cets_disco_SUITE.erl | 319 ++++++++++++++++++++++++++++++++++++++
 test/cets_test_helper.erl |  10 ++
 3 files changed, 329 insertions(+), 301 deletions(-)

diff --git a/test/cets_SUITE.erl b/test/cets_SUITE.erl
index 86286e8..b1a8092 100644
--- a/test/cets_SUITE.erl
+++ b/test/cets_SUITE.erl
@@ -149,21 +149,6 @@ cases() ->
         test_multinode,
         test_multinode_remote_insert,
         node_list_is_correct,
-        test_multinode_auto_discovery,
-        test_disco_add_table,
-        test_disco_delete_table,
-        test_disco_delete_unknown_table,
-        test_disco_delete_table_twice,
-        test_disco_file_appears,
-        test_disco_handles_bad_node,
-        cets_discovery_fun_backend_works,
-        test_disco_add_table_twice,
-        test_disco_add_two_tables,
-        disco_retried_if_get_nodes_fail,
-        disco_uses_regular_retry_interval_in_the_regular_phase,
-        disco_uses_regular_retry_interval_in_the_regular_phase_after_node_down,
-        disco_uses_regular_retry_interval_in_the_regular_phase_after_expired_node_down,
-        disco_handles_node_up_and_down,
         status_available_nodes,
         status_available_nodes_do_not_contain_nodes_with_stopped_disco,
         status_unavailable_nodes,
@@ -303,9 +288,6 @@ end_per_group(Group, Config) when Group == cets_seq_no_log; Group == cets_no_log
 end_per_group(_Group, Config) ->
     Config.
 
-init_per_testcase(test_multinode_auto_discovery = Name, Config) ->
-    ct:make_priv_dir(),
-    init_per_testcase_generic(Name, Config);
 init_per_testcase(Name, Config) ->
     init_per_testcase_generic(Name, Config).
 
@@ -1501,283 +1483,6 @@ node_list_is_correct(Config) ->
     [Node1, Node2, Node3] = other_nodes(Node4, Tab),
     ok.
 
-test_multinode_auto_discovery(Config) ->
-    Node1 = node(),
-    #{ct2 := Node2} = proplists:get_value(nodes, Config),
-    Tab = make_name(Config),
-    {ok, _Pid1} = start(Node1, Tab),
-    {ok, _Pid2} = start(Node2, Tab),
-    Dir = proplists:get_value(priv_dir, Config),
-    ct:pal("Dir ~p", [Dir]),
-    FileName = filename:join(Dir, "disco.txt"),
-    ok = file:write_file(FileName, io_lib:format("~s~n~s~n", [Node1, Node2])),
-    {ok, Disco} = cets_discovery:start_link(#{tables => [Tab], disco_file => FileName}),
-    %% Disco is async, so we have to wait for the final state
-    ok = wait_for_ready(Disco, 5000),
-    [Node2] = other_nodes(Node1, Tab),
-    [#{memory := _, nodes := [Node1, Node2], size := 0, table := Tab}] =
-        cets_discovery:info(Disco),
-    #{verify_ready := []} =
-        cets_discovery:system_info(Disco),
-    ok.
-
-test_disco_add_table(Config) ->
-    Node1 = node(),
-    #{ct2 := Node2} = proplists:get_value(nodes, Config),
-    Tab = make_name(Config),
-    {ok, _Pid1} = start(Node1, Tab),
-    {ok, _Pid2} = start(Node2, Tab),
-    Dir = proplists:get_value(priv_dir, Config),
-    ct:pal("Dir ~p", [Dir]),
-    FileName = filename:join(Dir, "disco.txt"),
-    ok = file:write_file(FileName, io_lib:format("~s~n~s~n", [Node1, Node2])),
-    {ok, Disco} = cets_discovery:start_link(#{tables => [], disco_file => FileName}),
-    cets_discovery:add_table(Disco, Tab),
-    %% Disco is async, so we have to wait for the final state
-    ok = wait_for_ready(Disco, 5000),
-    [Node2] = other_nodes(Node1, Tab),
-    [#{memory := _, nodes := [Node1, Node2], size := 0, table := Tab}] =
-        cets_discovery:info(Disco),
-    ok.
-
-test_disco_delete_table(Config) ->
-    F = fun(State) -> {{ok, []}, State} end,
-    {ok, Disco} = cets_discovery:start_link(#{
-        backend_module => cets_discovery_fun, get_nodes_fn => F
-    }),
-    Tab = make_name(Config),
-    cets_discovery:add_table(Disco, Tab),
-    #{tables := [Tab]} = cets_discovery:system_info(Disco),
-    cets_discovery:delete_table(Disco, Tab),
-    #{tables := []} = cets_discovery:system_info(Disco).
-
-test_disco_delete_unknown_table(Config) ->
-    F = fun(State) -> {{ok, []}, State} end,
-    {ok, Disco} = cets_discovery:start_link(#{
-        backend_module => cets_discovery_fun, get_nodes_fn => F
-    }),
-    Tab = make_name(Config),
-    cets_discovery:delete_table(Disco, Tab),
-    #{tables := []} = cets_discovery:system_info(Disco).
-
-test_disco_delete_table_twice(Config) ->
-    F = fun(State) -> {{ok, []}, State} end,
-    {ok, Disco} = cets_discovery:start_link(#{
-        backend_module => cets_discovery_fun, get_nodes_fn => F
-    }),
-    Tab = make_name(Config),
-    cets_discovery:add_table(Disco, Tab),
-    #{tables := [Tab]} = cets_discovery:system_info(Disco),
-    cets_discovery:delete_table(Disco, Tab),
-    cets_discovery:delete_table(Disco, Tab),
-    #{tables := []} = cets_discovery:system_info(Disco).
-
-test_disco_file_appears(Config) ->
-    Node1 = node(),
-    #{ct2 := Node2} = proplists:get_value(nodes, Config),
-    Tab = make_name(Config),
-    {ok, _Pid1} = start(Node1, Tab),
-    {ok, _Pid2} = start(Node2, Tab),
-    Dir = proplists:get_value(priv_dir, Config),
-    ct:pal("Dir ~p", [Dir]),
-    FileName = filename:join(Dir, "disco3.txt"),
-    file:delete(FileName),
-    {ok, Disco} = cets_discovery:start_link(#{tables => [], disco_file => FileName}),
-    cets_discovery:add_table(Disco, Tab),
-    cets_test_wait:wait_until(
-        fun() -> maps:get(last_get_nodes_retry_type, cets_discovery:system_info(Disco)) end,
-        after_error
-    ),
-    ok = file:write_file(FileName, io_lib:format("~s~n~s~n", [Node1, Node2])),
-    %% Disco is async, so we have to wait for the final state
-    ok = wait_for_ready(Disco, 5000),
-    [Node2] = other_nodes(Node1, Tab),
-    [#{memory := _, nodes := [Node1, Node2], size := 0, table := Tab}] =
-        cets_discovery:info(Disco),
-    ok.
-
-test_disco_handles_bad_node(Config) ->
-    Node1 = node(),
-    #{ct2 := Node2} = proplists:get_value(nodes, Config),
-    Tab = make_name(Config),
-    {ok, _Pid1} = start(Node1, Tab),
-    {ok, _Pid2} = start(Node2, Tab),
-    Dir = proplists:get_value(priv_dir, Config),
-    ct:pal("Dir ~p", [Dir]),
-    FileName = filename:join(Dir, "disco_badnode.txt"),
-    ok = file:write_file(FileName, io_lib:format("badnode@localhost~n~s~n~s~n", [Node1, Node2])),
-    {ok, Disco} = cets_discovery:start_link(#{tables => [], disco_file => FileName}),
-    cets_discovery:add_table(Disco, Tab),
-    %% Check that wait_for_ready would not block forever:
-    ok = wait_for_ready(Disco, 5000),
-    %% Check if the node sent pang:
-    #{unavailable_nodes := ['badnode@localhost']} = cets_discovery:system_info(Disco),
-    %% Check that other nodes are discovered fine
-    [#{memory := _, nodes := [Node1, Node2], size := 0, table := Tab}] =
-        cets_discovery:info(Disco).
-
-cets_discovery_fun_backend_works(Config) ->
-    Node1 = node(),
-    #{ct2 := Node2} = proplists:get_value(nodes, Config),
-    Tab = make_name(Config),
-    {ok, _Pid1} = start(Node1, Tab),
-    {ok, _Pid2} = start(Node2, Tab),
-    F = fun(State) -> {{ok, [Node1, Node2]}, State} end,
-    {ok, Disco} = cets_discovery:start_link(#{
-        backend_module => cets_discovery_fun, get_nodes_fn => F
-    }),
-    cets_discovery:add_table(Disco, Tab),
-    ok = wait_for_ready(Disco, 5000),
-    [#{memory := _, nodes := [Node1, Node2], size := 0, table := Tab}] =
-        cets_discovery:info(Disco).
-
-test_disco_add_table_twice(Config) ->
-    Dir = proplists:get_value(priv_dir, Config),
-    FileName = filename:join(Dir, "disco.txt"),
-    {ok, Disco} = cets_discovery:start_link(#{tables => [], disco_file => FileName}),
-    Tab = make_name(Config),
-    {ok, _Pid} = start_local(Tab),
-    cets_discovery:add_table(Disco, Tab),
-    cets_discovery:add_table(Disco, Tab),
-    %% Check that everything is fine
-    #{tables := [Tab]} = cets_discovery:system_info(Disco).
-
-test_disco_add_two_tables(Config) ->
-    Node1 = node(),
-    #{ct2 := Node2} = proplists:get_value(nodes, Config),
-    Tab1 = make_name(Config, 1),
-    Tab2 = make_name(Config, 2),
-    {ok, _} = start(Node1, Tab1),
-    {ok, _} = start(Node2, Tab1),
-    {ok, _} = start(Node1, Tab2),
-    {ok, _} = start(Node2, Tab2),
-    Me = self(),
-    F = fun
-        (State = #{waited := true}) ->
-            Me ! called_after_waited,
-            {{ok, [Node1, Node2]}, State};
-        (State) ->
-            wait_till_test_stage(Me, sent_both),
-            Me ! waited_for_sent_both,
-            {{ok, [Node1, Node2]}, State#{waited => true}}
-    end,
-    {ok, Disco} = cets_discovery:start_link(#{
-        backend_module => cets_discovery_fun, get_nodes_fn => F
-    }),
-    %% Add two tables async
-    cets_discovery:add_table(Disco, Tab1),
-    %% After the first table, Disco would get blocked in get_nodes function (see wait_till_test_stage in F above)
-    cets_discovery:add_table(Disco, Tab2),
-    put(test_stage, sent_both),
-    %% Just ensure wait_till_test_stage function works:
-    wait_till_test_stage(Me, sent_both),
-    %% First check is done, the second check should be triggered asap
-    %% (i.e. because of should_retry_get_nodes=true set in state)
-    receive_message(waited_for_sent_both),
-    %% try_joining would be called after set_nodes,
-    %% but it is async, so wait until it is done:
-    cets_test_wait:wait_until(
-        fun() ->
-            maps:with(
-                [get_nodes_status, should_retry_get_nodes, join_status, should_retry_join],
-                cets_discovery:system_info(Disco)
-            )
-        end,
-        #{
-            get_nodes_status => not_running,
-            should_retry_get_nodes => false,
-            join_status => not_running,
-            should_retry_join => false
-        }
-    ),
-    [
-        #{memory := _, nodes := [Node1, Node2], size := 0, table := Tab1},
-        #{memory := _, nodes := [Node1, Node2], size := 0, table := Tab2}
-    ] =
-        cets_discovery:info(Disco),
-    ok.
-
-disco_retried_if_get_nodes_fail(Config) ->
-    Node1 = node(),
-    #{ct2 := Node2} = proplists:get_value(nodes, Config),
-    Tab = make_name(Config),
-    {ok, _} = start(Node1, Tab),
-    {ok, _} = start(Node2, Tab),
-    F = fun(State) ->
-        {{error, simulate_error}, State}
-    end,
-    {ok, Disco} = cets_discovery:start_link(#{
-        backend_module => cets_discovery_fun, get_nodes_fn => F
-    }),
-    cets_discovery:add_table(Disco, Tab),
-    cets_test_wait:wait_until(
-        fun() -> maps:get(last_get_nodes_retry_type, cets_discovery:system_info(Disco)) end,
-        after_error
-    ),
-    ok.
-
-disco_uses_regular_retry_interval_in_the_regular_phase(Config) ->
-    #{disco := Disco} = generic_disco_uses_regular_retry_interval_in_the_regular_phase(Config),
-    #{phase := regular, retry_type := regular} = cets_discovery:system_info(Disco).
-
-%% Similar to disco_uses_regular_retry_interval_in_the_regular_phase, but has nodedown
-disco_uses_regular_retry_interval_in_the_regular_phase_after_node_down(Config) ->
-    SysInfo = generic_disco_uses_regular_retry_interval_in_the_regular_phase(Config),
-    #{disco := Disco, node2 := Node2} = SysInfo,
-    Disco ! {nodedown, Node2},
-    #{phase := regular, retry_type := after_nodedown} = cets_discovery:system_info(Disco).
-
-%% Similar to disco_uses_regular_retry_interval_in_the_regular_phase_after_node_down, but we simulate long downtime
-disco_uses_regular_retry_interval_in_the_regular_phase_after_expired_node_down(Config) ->
-    #{disco := Disco, node2 := Node2} = generic_disco_uses_regular_retry_interval_in_the_regular_phase(
-        Config
-    ),
-    Disco ! {nodedown, Node2},
-    TestTimestamp = erlang:system_time(millisecond) - timer:seconds(1000),
-    set_nodedown_timestamp(Disco, Node2, TestTimestamp),
-    #{phase := regular, retry_type := regular} = cets_discovery:system_info(Disco).
-
-generic_disco_uses_regular_retry_interval_in_the_regular_phase(Config) ->
-    Node1 = node(),
-    #{ct2 := Node2} = proplists:get_value(nodes, Config),
-    Tab = make_name(Config),
-    {ok, _} = start(Node1, Tab),
-    {ok, _} = start(Node2, Tab),
-    F = fun(State) -> {{ok, [Node1, Node2]}, State} end,
-    {ok, Disco} = cets_discovery:start_link(#{
-        backend_module => cets_discovery_fun, get_nodes_fn => F
-    }),
-    Disco ! enter_regular_phase,
-    cets_discovery:add_table(Disco, Tab),
-    cets_test_wait:wait_until(
-        fun() -> maps:get(last_get_nodes_retry_type, cets_discovery:system_info(Disco)) end, regular
-    ),
-    #{disco => Disco, node2 => Node2}.
-
-disco_handles_node_up_and_down(Config) ->
-    BadNode = 'badnode@localhost',
-    Node1 = node(),
-    #{ct2 := Node2} = proplists:get_value(nodes, Config),
-    Tab = make_name(Config),
-    {ok, _} = start(Node1, Tab),
-    {ok, _} = start(Node2, Tab),
-    F = fun(State) ->
-        {{ok, [Node1, Node2, BadNode]}, State}
-    end,
-    {ok, Disco} = cets_discovery:start_link(#{
-        backend_module => cets_discovery_fun, get_nodes_fn => F
-    }),
-    cets_discovery:add_table(Disco, Tab),
-    %% get_nodes call is async, so wait for it
-    cets_test_wait:wait_until(
-        fun() -> length(maps:get(nodes, cets_discovery:system_info(Disco))) end,
-        3
-    ),
-    Disco ! {nodeup, BadNode},
-    Disco ! {nodedown, BadNode},
-    %% Check that wait_for_ready still works
-    ok = wait_for_ready(Disco, 5000).
-
 status_available_nodes(Config) ->
     Node1 = node(),
     #{ct2 := Node2} = proplists:get_value(nodes, Config),
@@ -2831,12 +2536,6 @@ set_other_servers(Pid, Servers) ->
         State#{other_servers := Servers}
     end).
 
-%% Overwrites nodedown timestamp for the Node in the discovery server state
-set_nodedown_timestamp(Disco, Node, NewTimestamp) ->
-    sys:replace_state(Disco, fun(#{nodedown_timestamps := Map} = State) ->
-        State#{nodedown_timestamps := maps:put(Node, NewTimestamp, Map)}
-    end).
-
 stopped_pid() ->
     %% Get a pid for a stopped process
     {Pid, Mon} = spawn_monitor(fun() -> ok end),
diff --git a/test/cets_disco_SUITE.erl b/test/cets_disco_SUITE.erl
index 82023f3..583efee 100644
--- a/test/cets_disco_SUITE.erl
+++ b/test/cets_disco_SUITE.erl
@@ -5,6 +5,20 @@
 
 -compile([export_all, nowarn_export_all]).
 
+-import(cets_test_setup, [
+    start/2,
+    start_local/1,
+    start_local/2,
+    make_name/1,
+    make_name/2,
+    disco_name/1
+]).
+
+-import(cets_test_wait, [
+    wait_for_ready/2,
+    wait_till_test_stage/2
+]).
+
 -import(cets_test_setup, [
     setup_two_nodes_and_discovery/1,
     setup_two_nodes_and_discovery/2,
@@ -28,8 +42,13 @@
 
 -import(cets_test_helper, [assert_unique/1]).
 
+-import(cets_test_rpc, [
+    other_nodes/2
+]).
+
 all() ->
     [
+        {group, cets},
         {group, cets_seq},
         {group, cets_seq_no_log}
     ].
@@ -37,6 +56,7 @@ all() ->
 groups() ->
     %% Cases should have unique names, because we name CETS servers based on case names
     [
+        {cets, [parallel, {repeat_until_any_fail, 3}], assert_unique(cases())},
         %% These tests actually simulate a netsplit on the distribution level.
         %% Though, global's prevent_overlapping_partitions option starts kicking
         %% all nodes from the cluster, so we have to be careful not to break other cases.
@@ -46,6 +66,25 @@ groups() ->
             assert_unique(cets_seq_no_log_cases())}
     ].
 
+cases() ->
+    [
+        test_multinode_auto_discovery,
+        test_disco_add_table,
+        test_disco_delete_table,
+        test_disco_delete_unknown_table,
+        test_disco_delete_table_twice,
+        test_disco_file_appears,
+        test_disco_handles_bad_node,
+        cets_discovery_fun_backend_works,
+        test_disco_add_table_twice,
+        test_disco_add_two_tables,
+        disco_retried_if_get_nodes_fail,
+        disco_uses_regular_retry_interval_in_the_regular_phase,
+        disco_uses_regular_retry_interval_in_the_regular_phase_after_node_down,
+        disco_uses_regular_retry_interval_in_the_regular_phase_after_expired_node_down,
+        disco_handles_node_up_and_down
+    ].
+
 seq_cases() ->
     [
         disco_logs_nodeup,
@@ -90,6 +129,9 @@ end_per_group(Group, Config) when Group == cets_seq_no_log; Group == cets_no_log
 end_per_group(_Group, Config) ->
     Config.
 
+init_per_testcase(test_multinode_auto_discovery = Name, Config) ->
+    ct:make_priv_dir(),
+    init_per_testcase_generic(Name, Config);
 init_per_testcase(Name, Config) ->
     init_per_testcase_generic(Name, Config).
 
@@ -104,6 +146,283 @@ end_per_testcase(_, _Config) ->
 log_modules() ->
     [cets, cets_call, cets_long, cets_join, cets_discovery].
 
+test_multinode_auto_discovery(Config) ->
+    Node1 = node(),
+    #{ct2 := Node2} = proplists:get_value(nodes, Config),
+    Tab = make_name(Config),
+    {ok, _Pid1} = start(Node1, Tab),
+    {ok, _Pid2} = start(Node2, Tab),
+    Dir = proplists:get_value(priv_dir, Config),
+    ct:pal("Dir ~p", [Dir]),
+    FileName = filename:join(Dir, "disco.txt"),
+    ok = file:write_file(FileName, io_lib:format("~s~n~s~n", [Node1, Node2])),
+    {ok, Disco} = cets_discovery:start_link(#{tables => [Tab], disco_file => FileName}),
+    %% Disco is async, so we have to wait for the final state
+    ok = wait_for_ready(Disco, 5000),
+    [Node2] = other_nodes(Node1, Tab),
+    [#{memory := _, nodes := [Node1, Node2], size := 0, table := Tab}] =
+        cets_discovery:info(Disco),
+    #{verify_ready := []} =
+        cets_discovery:system_info(Disco),
+    ok.
+
+test_disco_add_table(Config) ->
+    Node1 = node(),
+    #{ct2 := Node2} = proplists:get_value(nodes, Config),
+    Tab = make_name(Config),
+    {ok, _Pid1} = start(Node1, Tab),
+    {ok, _Pid2} = start(Node2, Tab),
+    Dir = proplists:get_value(priv_dir, Config),
+    ct:pal("Dir ~p", [Dir]),
+    FileName = filename:join(Dir, "disco.txt"),
+    ok = file:write_file(FileName, io_lib:format("~s~n~s~n", [Node1, Node2])),
+    {ok, Disco} = cets_discovery:start_link(#{tables => [], disco_file => FileName}),
+    cets_discovery:add_table(Disco, Tab),
+    %% Disco is async, so we have to wait for the final state
+    ok = wait_for_ready(Disco, 5000),
+    [Node2] = other_nodes(Node1, Tab),
+    [#{memory := _, nodes := [Node1, Node2], size := 0, table := Tab}] =
+        cets_discovery:info(Disco),
+    ok.
+
+test_disco_delete_table(Config) ->
+    F = fun(State) -> {{ok, []}, State} end,
+    {ok, Disco} = cets_discovery:start_link(#{
+        backend_module => cets_discovery_fun, get_nodes_fn => F
+    }),
+    Tab = make_name(Config),
+    cets_discovery:add_table(Disco, Tab),
+    #{tables := [Tab]} = cets_discovery:system_info(Disco),
+    cets_discovery:delete_table(Disco, Tab),
+    #{tables := []} = cets_discovery:system_info(Disco).
+
+test_disco_delete_unknown_table(Config) ->
+    F = fun(State) -> {{ok, []}, State} end,
+    {ok, Disco} = cets_discovery:start_link(#{
+        backend_module => cets_discovery_fun, get_nodes_fn => F
+    }),
+    Tab = make_name(Config),
+    cets_discovery:delete_table(Disco, Tab),
+    #{tables := []} = cets_discovery:system_info(Disco).
+
+test_disco_delete_table_twice(Config) ->
+    F = fun(State) -> {{ok, []}, State} end,
+    {ok, Disco} = cets_discovery:start_link(#{
+        backend_module => cets_discovery_fun, get_nodes_fn => F
+    }),
+    Tab = make_name(Config),
+    cets_discovery:add_table(Disco, Tab),
+    #{tables := [Tab]} = cets_discovery:system_info(Disco),
+    cets_discovery:delete_table(Disco, Tab),
+    cets_discovery:delete_table(Disco, Tab),
+    #{tables := []} = cets_discovery:system_info(Disco).
+
+test_disco_file_appears(Config) ->
+    Node1 = node(),
+    #{ct2 := Node2} = proplists:get_value(nodes, Config),
+    Tab = make_name(Config),
+    {ok, _Pid1} = start(Node1, Tab),
+    {ok, _Pid2} = start(Node2, Tab),
+    Dir = proplists:get_value(priv_dir, Config),
+    ct:pal("Dir ~p", [Dir]),
+    FileName = filename:join(Dir, "disco3.txt"),
+    file:delete(FileName),
+    {ok, Disco} = cets_discovery:start_link(#{tables => [], disco_file => FileName}),
+    cets_discovery:add_table(Disco, Tab),
+    cets_test_wait:wait_until(
+        fun() -> maps:get(last_get_nodes_retry_type, cets_discovery:system_info(Disco)) end,
+        after_error
+    ),
+    ok = file:write_file(FileName, io_lib:format("~s~n~s~n", [Node1, Node2])),
+    %% Disco is async, so we have to wait for the final state
+    ok = wait_for_ready(Disco, 5000),
+    [Node2] = other_nodes(Node1, Tab),
+    [#{memory := _, nodes := [Node1, Node2], size := 0, table := Tab}] =
+        cets_discovery:info(Disco),
+    ok.
+
+test_disco_handles_bad_node(Config) ->
+    Node1 = node(),
+    #{ct2 := Node2} = proplists:get_value(nodes, Config),
+    Tab = make_name(Config),
+    {ok, _Pid1} = start(Node1, Tab),
+    {ok, _Pid2} = start(Node2, Tab),
+    Dir = proplists:get_value(priv_dir, Config),
+    ct:pal("Dir ~p", [Dir]),
+    FileName = filename:join(Dir, "disco_badnode.txt"),
+    ok = file:write_file(FileName, io_lib:format("badnode@localhost~n~s~n~s~n", [Node1, Node2])),
+    {ok, Disco} = cets_discovery:start_link(#{tables => [], disco_file => FileName}),
+    cets_discovery:add_table(Disco, Tab),
+    %% Check that wait_for_ready would not block forever:
+    ok = wait_for_ready(Disco, 5000),
+    %% Check if the node sent pang:
+    #{unavailable_nodes := ['badnode@localhost']} = cets_discovery:system_info(Disco),
+    %% Check that other nodes are discovered fine
+    [#{memory := _, nodes := [Node1, Node2], size := 0, table := Tab}] =
+        cets_discovery:info(Disco).
+
+cets_discovery_fun_backend_works(Config) ->
+    Node1 = node(),
+    #{ct2 := Node2} = proplists:get_value(nodes, Config),
+    Tab = make_name(Config),
+    {ok, _Pid1} = start(Node1, Tab),
+    {ok, _Pid2} = start(Node2, Tab),
+    F = fun(State) -> {{ok, [Node1, Node2]}, State} end,
+    {ok, Disco} = cets_discovery:start_link(#{
+        backend_module => cets_discovery_fun, get_nodes_fn => F
+    }),
+    cets_discovery:add_table(Disco, Tab),
+    ok = wait_for_ready(Disco, 5000),
+    [#{memory := _, nodes := [Node1, Node2], size := 0, table := Tab}] =
+        cets_discovery:info(Disco).
+
+test_disco_add_table_twice(Config) ->
+    Dir = proplists:get_value(priv_dir, Config),
+    FileName = filename:join(Dir, "disco.txt"),
+    {ok, Disco} = cets_discovery:start_link(#{tables => [], disco_file => FileName}),
+    Tab = make_name(Config),
+    {ok, _Pid} = start_local(Tab),
+    cets_discovery:add_table(Disco, Tab),
+    cets_discovery:add_table(Disco, Tab),
+    %% Check that everything is fine
+    #{tables := [Tab]} = cets_discovery:system_info(Disco).
+
+test_disco_add_two_tables(Config) ->
+    Node1 = node(),
+    #{ct2 := Node2} = proplists:get_value(nodes, Config),
+    Tab1 = make_name(Config, 1),
+    Tab2 = make_name(Config, 2),
+    {ok, _} = start(Node1, Tab1),
+    {ok, _} = start(Node2, Tab1),
+    {ok, _} = start(Node1, Tab2),
+    {ok, _} = start(Node2, Tab2),
+    Me = self(),
+    F = fun
+        (State = #{waited := true}) ->
+            Me ! called_after_waited,
+            {{ok, [Node1, Node2]}, State};
+        (State) ->
+            wait_till_test_stage(Me, sent_both),
+            Me ! waited_for_sent_both,
+            {{ok, [Node1, Node2]}, State#{waited => true}}
+    end,
+    {ok, Disco} = cets_discovery:start_link(#{
+        backend_module => cets_discovery_fun, get_nodes_fn => F
+    }),
+    %% Add two tables async
+    cets_discovery:add_table(Disco, Tab1),
+    %% After the first table, Disco would get blocked in get_nodes function (see wait_till_test_stage in F above)
+    cets_discovery:add_table(Disco, Tab2),
+    put(test_stage, sent_both),
+    %% Just ensure wait_till_test_stage function works:
+    wait_till_test_stage(Me, sent_both),
+    %% First check is done, the second check should be triggered asap
+    %% (i.e. because of should_retry_get_nodes=true set in state)
+    receive_message(waited_for_sent_both),
+    %% try_joining would be called after set_nodes,
+    %% but it is async, so wait until it is done:
+    cets_test_wait:wait_until(
+        fun() ->
+            maps:with(
+                [get_nodes_status, should_retry_get_nodes, join_status, should_retry_join],
+                cets_discovery:system_info(Disco)
+            )
+        end,
+        #{
+            get_nodes_status => not_running,
+            should_retry_get_nodes => false,
+            join_status => not_running,
+            should_retry_join => false
+        }
+    ),
+    [
+        #{memory := _, nodes := [Node1, Node2], size := 0, table := Tab1},
+        #{memory := _, nodes := [Node1, Node2], size := 0, table := Tab2}
+    ] =
+        cets_discovery:info(Disco),
+    ok.
+
+disco_retried_if_get_nodes_fail(Config) ->
+    Node1 = node(),
+    #{ct2 := Node2} = proplists:get_value(nodes, Config),
+    Tab = make_name(Config),
+    {ok, _} = start(Node1, Tab),
+    {ok, _} = start(Node2, Tab),
+    F = fun(State) ->
+        {{error, simulate_error}, State}
+    end,
+    {ok, Disco} = cets_discovery:start_link(#{
+        backend_module => cets_discovery_fun, get_nodes_fn => F
+    }),
+    cets_discovery:add_table(Disco, Tab),
+    cets_test_wait:wait_until(
+        fun() -> maps:get(last_get_nodes_retry_type, cets_discovery:system_info(Disco)) end,
+        after_error
+    ),
+    ok.
+
+disco_uses_regular_retry_interval_in_the_regular_phase(Config) ->
+    #{disco := Disco} = generic_disco_uses_regular_retry_interval_in_the_regular_phase(Config),
+    #{phase := regular, retry_type := regular} = cets_discovery:system_info(Disco).
+
+%% Similar to disco_uses_regular_retry_interval_in_the_regular_phase, but has nodedown
+disco_uses_regular_retry_interval_in_the_regular_phase_after_node_down(Config) ->
+    SysInfo = generic_disco_uses_regular_retry_interval_in_the_regular_phase(Config),
+    #{disco := Disco, node2 := Node2} = SysInfo,
+    Disco ! {nodedown, Node2},
+    #{phase := regular, retry_type := after_nodedown} = cets_discovery:system_info(Disco).
+
+%% Similar to disco_uses_regular_retry_interval_in_the_regular_phase_after_node_down, but we simulate long downtime
+disco_uses_regular_retry_interval_in_the_regular_phase_after_expired_node_down(Config) ->
+    #{disco := Disco, node2 := Node2} = generic_disco_uses_regular_retry_interval_in_the_regular_phase(
+        Config
+    ),
+    Disco ! {nodedown, Node2},
+    TestTimestamp = erlang:system_time(millisecond) - timer:seconds(1000),
+    cets_test_helper:set_nodedown_timestamp(Disco, Node2, TestTimestamp),
+    #{phase := regular, retry_type := regular} = cets_discovery:system_info(Disco).
+
+generic_disco_uses_regular_retry_interval_in_the_regular_phase(Config) ->
+    Node1 = node(),
+    #{ct2 := Node2} = proplists:get_value(nodes, Config),
+    Tab = make_name(Config),
+    {ok, _} = start(Node1, Tab),
+    {ok, _} = start(Node2, Tab),
+    F = fun(State) -> {{ok, [Node1, Node2]}, State} end,
+    {ok, Disco} = cets_discovery:start_link(#{
+        backend_module => cets_discovery_fun, get_nodes_fn => F
+    }),
+    Disco ! enter_regular_phase,
+    cets_discovery:add_table(Disco, Tab),
+    cets_test_wait:wait_until(
+        fun() -> maps:get(last_get_nodes_retry_type, cets_discovery:system_info(Disco)) end, regular
+    ),
+    #{disco => Disco, node2 => Node2}.
+
+disco_handles_node_up_and_down(Config) ->
+    BadNode = 'badnode@localhost',
+    Node1 = node(),
+    #{ct2 := Node2} = proplists:get_value(nodes, Config),
+    Tab = make_name(Config),
+    {ok, _} = start(Node1, Tab),
+    {ok, _} = start(Node2, Tab),
+    F = fun(State) ->
+        {{ok, [Node1, Node2, BadNode]}, State}
+    end,
+    {ok, Disco} = cets_discovery:start_link(#{
+        backend_module => cets_discovery_fun, get_nodes_fn => F
+    }),
+    cets_discovery:add_table(Disco, Tab),
+    %% get_nodes call is async, so wait for it
+    cets_test_wait:wait_until(
+        fun() -> length(maps:get(nodes, cets_discovery:system_info(Disco))) end,
+        3
+    ),
+    Disco ! {nodeup, BadNode},
+    Disco ! {nodedown, BadNode},
+    %% Check that wait_for_ready still works
+    ok = wait_for_ready(Disco, 5000).
+
 disco_logs_nodeup(Config) ->
     logger_debug_h:start(#{id => ?FUNCTION_NAME}),
     #{disco := Disco, node2 := Node2} = setup_two_nodes_and_discovery(Config),
diff --git a/test/cets_test_helper.erl b/test/cets_test_helper.erl
index 144bf54..34137b8 100644
--- a/test/cets_test_helper.erl
+++ b/test/cets_test_helper.erl
@@ -6,6 +6,10 @@
     assert_unique/1
 ]).
 
+-export([
+    set_nodedown_timestamp/3
+]).
+
 get_disco_timestamp(Disco, MapName, NodeKey) ->
     Info = cets_discovery:system_info(Disco),
     #{MapName := #{NodeKey := Timestamp}} = Info,
@@ -15,3 +19,9 @@ get_disco_timestamp(Disco, MapName, NodeKey) ->
 assert_unique(List) ->
     ?assertEqual([], List -- lists:usort(List)),
     List.
+
+%% Overwrites nodedown timestamp for the Node in the discovery server state
+set_nodedown_timestamp(Disco, Node, NewTimestamp) ->
+    sys:replace_state(Disco, fun(#{nodedown_timestamps := Map} = State) ->
+        State#{nodedown_timestamps := maps:put(Node, NewTimestamp, Map)}
+    end).

From 3baa846fba7424563cd55cd0bb7089b916dcfba5 Mon Sep 17 00:00:00 2001
From: Mikhail Uvarov <arcusfelis@gmail.com>
Date: Tue, 12 Mar 2024 14:55:14 +0100
Subject: [PATCH 10/30] Move cases into cets_status_SUITE

---
 test/cets_SUITE.erl                           | 289 +------------
 test/cets_status_SUITE.erl                    | 400 ++++++++++++++++++
 .../status_data.txt                           |   0
 test/cets_test_helper.erl                     |   8 +-
 4 files changed, 411 insertions(+), 286 deletions(-)
 create mode 100644 test/cets_status_SUITE.erl
 rename test/{cets_SUITE_data => cets_status_SUITE_data}/status_data.txt (100%)

diff --git a/test/cets_SUITE.erl b/test/cets_SUITE.erl
index b1a8092..33961d0 100644
--- a/test/cets_SUITE.erl
+++ b/test/cets_SUITE.erl
@@ -68,7 +68,10 @@
     assert_nothing_is_logged/2
 ]).
 
--import(cets_test_helper, [assert_unique/1]).
+-import(cets_test_helper, [
+    assert_unique/1,
+    set_other_servers/2
+]).
 
 all() ->
     [
@@ -149,17 +152,6 @@ cases() ->
         test_multinode,
         test_multinode_remote_insert,
         node_list_is_correct,
-        status_available_nodes,
-        status_available_nodes_do_not_contain_nodes_with_stopped_disco,
-        status_unavailable_nodes,
-        status_unavailable_nodes_is_subset_of_discovery_nodes,
-        status_joined_nodes,
-        status_discovery_works,
-        status_discovered_nodes,
-        status_remote_nodes_without_disco,
-        status_remote_nodes_with_unknown_tables,
-        status_remote_nodes_with_missing_nodes,
-        status_conflict_nodes,
         disco_wait_for_get_nodes_works,
         disco_wait_for_get_nodes_blocks_and_returns,
         disco_wait_for_get_nodes_when_get_nodes_needs_to_be_retried,
@@ -209,7 +201,6 @@ cases() ->
         send_leader_op_throws_noproc,
         pinfo_returns_value,
         pinfo_returns_undefined,
-        format_data_does_not_return_table_duplicates,
         cets_ping_non_existing_node,
         cets_ping_net_family,
         unexpected_nodedown_is_ignored_by_disco,
@@ -1483,253 +1474,6 @@ node_list_is_correct(Config) ->
     [Node1, Node2, Node3] = other_nodes(Node4, Tab),
     ok.
 
-status_available_nodes(Config) ->
-    Node1 = node(),
-    #{ct2 := Node2} = proplists:get_value(nodes, Config),
-    F = fun(State) ->
-        {{ok, []}, State}
-    end,
-    DiscoName = disco_name(Config),
-    start_disco(Node1, #{name => DiscoName, backend_module => cets_discovery_fun, get_nodes_fn => F}),
-    start_disco(Node2, #{name => DiscoName, backend_module => cets_discovery_fun, get_nodes_fn => F}),
-    ?assertMatch(#{available_nodes := [Node1, Node2]}, cets_status:status(DiscoName)).
-
-status_available_nodes_do_not_contain_nodes_with_stopped_disco(Config) ->
-    Node1 = node(),
-    #{ct2 := Node2} = proplists:get_value(nodes, Config),
-    F = fun(State) ->
-        {{ok, [Node1, Node2]}, State}
-    end,
-    DiscoName = disco_name(Config),
-    start_disco(Node1, #{name => DiscoName, backend_module => cets_discovery_fun, get_nodes_fn => F}),
-    %% Disco not running
-    ?assertMatch(#{available_nodes := [Node1]}, cets_status:status(DiscoName)).
-
-status_unavailable_nodes(Config) ->
-    Node1 = node(),
-    F = fun(State) ->
-        {{ok, [Node1, 'badnode@localhost']}, State}
-    end,
-    DiscoName = disco_name(Config),
-    Disco = start_disco(Node1, #{
-        name => DiscoName, backend_module => cets_discovery_fun, get_nodes_fn => F
-    }),
-    %% Disco needs at least one table to start calling get_nodes function
-    Tab = make_name(Config),
-    {ok, _} = start(Node1, Tab),
-    cets_discovery:add_table(Disco, Tab),
-    ok = wait_for_ready(DiscoName, 5000),
-    ?assertMatch(#{unavailable_nodes := ['badnode@localhost']}, cets_status:status(DiscoName)).
-
-status_unavailable_nodes_is_subset_of_discovery_nodes(Config) ->
-    Node1 = node(),
-    Self = self(),
-    GetFn1 = fun(State) -> {{ok, [Node1, 'badnode@localhost']}, State} end,
-    GetFn2 = fun(State) ->
-        Self ! get_fn2_called,
-        {{ok, [Node1]}, State}
-    end,
-    %% Setup meck
-    BackendModule = make_name(Config, disco_backend),
-    meck:new(BackendModule, [non_strict]),
-    meck:expect(BackendModule, init, fun(_Opts) -> undefined end),
-    meck:expect(BackendModule, get_nodes, GetFn1),
-    DiscoName = disco_name(Config),
-    Disco = start_disco(Node1, #{
-        name => DiscoName, backend_module => BackendModule
-    }),
-    %% Disco needs at least one table to start calling get_nodes function
-    Tab = make_name(Config),
-    {ok, _} = start(Node1, Tab),
-    cets_discovery:add_table(Disco, Tab),
-    ok = wait_for_ready(DiscoName, 5000),
-    ?assertMatch(#{unavailable_nodes := ['badnode@localhost']}, cets_status:status(DiscoName)),
-    %% Remove badnode from disco
-    meck:expect(BackendModule, get_nodes, GetFn2),
-    %% Force check.
-    Disco ! check,
-    receive_message(get_fn2_called),
-    %% The unavailable_nodes list is updated
-    CondF = fun() -> maps:get(unavailable_nodes, cets_status:status(DiscoName)) end,
-    cets_test_wait:wait_until(CondF, []).
-
-status_joined_nodes(Config) ->
-    Node1 = node(),
-    #{ct2 := Node2} = proplists:get_value(nodes, Config),
-    F = fun(State) ->
-        {{ok, [Node1, Node2]}, State}
-    end,
-    DiscoName = disco_name(Config),
-    DiscoOpts = #{
-        name => DiscoName, backend_module => cets_discovery_fun, get_nodes_fn => F
-    },
-    Disco1 = start_disco(Node1, DiscoOpts),
-    Disco2 = start_disco(Node2, DiscoOpts),
-    Tab = make_name(Config),
-    {ok, _} = start(Node1, Tab),
-    {ok, _} = start(Node2, Tab),
-    %% Add table using pids (i.e. no need to do RPCs here)
-    cets_discovery:add_table(Disco1, Tab),
-    cets_discovery:add_table(Disco2, Tab),
-    ok = wait_for_ready(DiscoName, 5000),
-    cets_test_wait:wait_until(fun() -> maps:get(joined_nodes, cets_status:status(DiscoName)) end, [
-        Node1, Node2
-    ]).
-
-status_discovery_works(Config) ->
-    Node1 = node(),
-    #{ct2 := Node2} = proplists:get_value(nodes, Config),
-    F = fun(State) ->
-        {{ok, [Node1, Node2]}, State}
-    end,
-    DiscoName = disco_name(Config),
-    DiscoOpts = #{
-        name => DiscoName, backend_module => cets_discovery_fun, get_nodes_fn => F
-    },
-    Disco1 = start_disco(Node1, DiscoOpts),
-    Disco2 = start_disco(Node2, DiscoOpts),
-    Tab = make_name(Config),
-    {ok, _} = start(Node1, Tab),
-    {ok, _} = start(Node2, Tab),
-    %% Add table using pids (i.e. no need to do RPCs here)
-    cets_discovery:add_table(Disco1, Tab),
-    cets_discovery:add_table(Disco2, Tab),
-    ok = wait_for_ready(DiscoName, 5000),
-    ?assertMatch(#{discovery_works := true}, cets_status:status(DiscoName)).
-
-status_discovered_nodes(Config) ->
-    Node1 = node(),
-    #{ct2 := Node2} = proplists:get_value(nodes, Config),
-    F = fun(State) ->
-        {{ok, [Node1, Node2]}, State}
-    end,
-    DiscoName = disco_name(Config),
-    Disco = start_disco(Node1, #{
-        name => DiscoName, backend_module => cets_discovery_fun, get_nodes_fn => F
-    }),
-    Tab = make_name(Config),
-    {ok, _} = start(Node1, Tab),
-    {ok, _} = start(Node2, Tab),
-    %% Add table using pids (i.e. no need to do RPCs here)
-    cets_discovery:add_table(Disco, Tab),
-    ok = wait_for_ready(DiscoName, 5000),
-    ?assertMatch(#{discovered_nodes := [Node1, Node2]}, cets_status:status(DiscoName)).
-
-status_remote_nodes_without_disco(Config) ->
-    Node1 = node(),
-    #{ct2 := Node2} = proplists:get_value(nodes, Config),
-    F = fun(State) ->
-        {{ok, [Node1, Node2]}, State}
-    end,
-    DiscoName = disco_name(Config),
-    Disco = start_disco(Node1, #{
-        name => DiscoName, backend_module => cets_discovery_fun, get_nodes_fn => F
-    }),
-    Tab = make_name(Config),
-    {ok, _} = start(Node1, Tab),
-    cets_discovery:add_table(Disco, Tab),
-    ok = wait_for_ready(DiscoName, 5000),
-    ?assertMatch(#{remote_nodes_without_disco := [Node2]}, cets_status:status(DiscoName)).
-
-status_remote_nodes_with_unknown_tables(Config) ->
-    Node1 = node(),
-    #{ct2 := Node2} = proplists:get_value(nodes, Config),
-    F = fun(State) ->
-        {{ok, [Node1, Node2]}, State}
-    end,
-    DiscoName = disco_name(Config),
-    DiscoOpts = #{
-        name => DiscoName, backend_module => cets_discovery_fun, get_nodes_fn => F
-    },
-    Disco1 = start_disco(Node1, DiscoOpts),
-    Disco2 = start_disco(Node2, DiscoOpts),
-    Tab1 = make_name(Config, 1),
-    Tab2 = make_name(Config, 2),
-    %% Node1 does not have Tab2
-    {ok, _} = start(Node1, Tab2),
-    {ok, _} = start(Node2, Tab1),
-    {ok, _} = start(Node2, Tab2),
-    %% Add table using pids (i.e. no need to do RPCs here)
-    cets_discovery:add_table(Disco1, Tab1),
-    cets_discovery:add_table(Disco2, Tab1),
-    cets_discovery:add_table(Disco2, Tab2),
-    ok = wait_for_ready(DiscoName, 5000),
-    cets_test_wait:wait_until(
-        fun() -> maps:get(remote_nodes_with_unknown_tables, cets_status:status(DiscoName)) end, [
-            Node2
-        ]
-    ),
-    cets_test_wait:wait_until(
-        fun() -> maps:get(remote_unknown_tables, cets_status:status(DiscoName)) end, [
-            Tab2
-        ]
-    ).
-
-status_remote_nodes_with_missing_nodes(Config) ->
-    Node1 = node(),
-    #{ct2 := Node2} = proplists:get_value(nodes, Config),
-    F = fun(State) ->
-        {{ok, [Node1, Node2]}, State}
-    end,
-    DiscoName = disco_name(Config),
-    DiscoOpts = #{
-        name => DiscoName, backend_module => cets_discovery_fun, get_nodes_fn => F
-    },
-    Disco1 = start_disco(Node1, DiscoOpts),
-    Disco2 = start_disco(Node2, DiscoOpts),
-    Tab1 = make_name(Config, 1),
-    Tab2 = make_name(Config, 2),
-    %% Node2 does not have Tab2
-    {ok, _} = start(Node1, Tab1),
-    {ok, _} = start(Node1, Tab2),
-    {ok, _} = start(Node2, Tab1),
-    cets_discovery:add_table(Disco1, Tab1),
-    cets_discovery:add_table(Disco1, Tab2),
-    cets_discovery:add_table(Disco2, Tab1),
-    ok = wait_for_ready(DiscoName, 5000),
-    cets_test_wait:wait_until(
-        fun() -> maps:get(remote_nodes_with_missing_tables, cets_status:status(DiscoName)) end, [
-            Node2
-        ]
-    ),
-    cets_test_wait:wait_until(
-        fun() -> maps:get(remote_missing_tables, cets_status:status(DiscoName)) end, [
-            Tab2
-        ]
-    ).
-
-status_conflict_nodes(Config) ->
-    Node1 = node(),
-    #{ct2 := Node2} = proplists:get_value(nodes, Config),
-    F = fun(State) ->
-        {{ok, [Node1, Node2]}, State}
-    end,
-    DiscoName = disco_name(Config),
-    DiscoOpts = #{
-        name => DiscoName, backend_module => cets_discovery_fun, get_nodes_fn => F
-    },
-    Disco1 = start_disco(Node1, DiscoOpts),
-    Disco2 = start_disco(Node2, DiscoOpts),
-    Tab1 = make_name(Config, 1),
-    Tab2 = make_name(Config, 2),
-    {ok, _} = start(Node1, Tab1),
-    {ok, _} = start(Node1, Tab2),
-    {ok, _} = start(Node2, Tab1),
-    {ok, Pid22} = start(Node2, Tab2),
-    cets_discovery:add_table(Disco1, Tab1),
-    cets_discovery:add_table(Disco1, Tab2),
-    cets_discovery:add_table(Disco2, Tab1),
-    cets_discovery:add_table(Disco2, Tab2),
-
-    ok = wait_for_ready(DiscoName, 5000),
-    set_other_servers(Pid22, []),
-    cets_test_wait:wait_until(
-        fun() -> maps:get(conflict_nodes, cets_status:status(DiscoName)) end, [Node2]
-    ),
-    cets_test_wait:wait_until(
-        fun() -> maps:get(conflict_tables, cets_status:status(DiscoName)) end, [Tab2]
-    ).
-
 disco_wait_for_get_nodes_works(_Config) ->
     F = fun(State) -> {{ok, []}, State} end,
     {ok, Disco} = cets_discovery:start_link(#{
@@ -2446,10 +2190,6 @@ node_down_history_is_updated_when_netsplit_happens(Config) ->
         cets:stop(Pid5)
     end.
 
-format_data_does_not_return_table_duplicates(Config) ->
-    Res = cets_status:format_data(test_data_for_duplicate_missing_table_in_status(Config)),
-    ?assertMatch(#{remote_unknown_tables := [], remote_nodes_with_missing_tables := []}, Res).
-
 cets_ping_non_existing_node(_Config) ->
     pang = cets_ping:ping('mongooseim@non_existing_host').
 
@@ -2531,11 +2271,6 @@ start_link_local(Name, Opts) ->
 set_join_ref(Pid, JoinRef) ->
     sys:replace_state(Pid, fun(#{join_ref := _} = State) -> State#{join_ref := JoinRef} end).
 
-set_other_servers(Pid, Servers) ->
-    sys:replace_state(Pid, fun(#{other_servers := _} = State) ->
-        State#{other_servers := Servers}
-    end).
-
 stopped_pid() ->
     %% Get a pid for a stopped process
     {Pid, Mon} = spawn_monitor(fun() -> ok end),
@@ -2565,22 +2300,6 @@ send_join_start_back_and_wait_for_continue_joining() ->
             ok
     end.
 
-%% Gathered after Helm update
-%% with cets_status:gather_data(mongoose_cets_discovery).
-test_data_for_duplicate_missing_table_in_status(Config) ->
-    %% Create atoms in non sorted order
-    %% maps:keys returns keys in the atom-creation order (and not sorted).
-    %% Also, compiler is smart and would optimize list_to_atom("literal_string"),
-    %% so we do a module call to disable this optimization.
-    _ = list_to_atom(?MODULE:return_same("cets_external_component")),
-    _ = list_to_atom(?MODULE:return_same("cets_bosh")),
-    Name = filename:join(proplists:get_value(data_dir, Config), "status_data.txt"),
-    {ok, [Term]} = file:consult(Name),
-    Term.
-
-return_same(X) ->
-    X.
-
 not_leader(Leader, Other, Leader) ->
     Other;
 not_leader(Other, Leader, Leader) ->
diff --git a/test/cets_status_SUITE.erl b/test/cets_status_SUITE.erl
new file mode 100644
index 0000000..fd6ccd5
--- /dev/null
+++ b/test/cets_status_SUITE.erl
@@ -0,0 +1,400 @@
+-module(cets_status_SUITE).
+-include_lib("common_test/include/ct.hrl").
+-include_lib("eunit/include/eunit.hrl").
+-include_lib("kernel/include/logger.hrl").
+
+-compile([export_all, nowarn_export_all]).
+
+-import(cets_test_setup, [
+    start/2,
+    start_local/1,
+    start_local/2,
+    start_disco/2,
+    make_name/1,
+    make_name/2,
+    disco_name/1
+]).
+
+-import(cets_test_wait, [
+    wait_for_ready/2,
+    wait_till_test_stage/2
+]).
+
+-import(cets_test_setup, [
+    setup_two_nodes_and_discovery/1,
+    setup_two_nodes_and_discovery/2,
+    simulate_disco_restart/1
+]).
+
+-import(cets_test_wait, [
+    wait_for_disco_timestamp_to_appear/3,
+    wait_for_disco_timestamp_to_be_updated/4
+]).
+
+-import(cets_test_receive, [
+    receive_message/1,
+    flush_message/1
+]).
+
+-import(cets_test_peer, [
+    disconnect_node/2,
+    disconnect_node_by_name/2
+]).
+
+-import(cets_test_helper, [
+    assert_unique/1,
+    set_other_servers/2
+]).
+
+-import(cets_test_rpc, [
+    other_nodes/2
+]).
+
+all() ->
+    [
+        {group, cets}
+        %       {group, cets_seq},
+        %       {group, cets_seq_no_log}
+    ].
+
+groups() ->
+    %% Cases should have unique names, because we name CETS servers based on case names
+    [
+        {cets, [parallel, {repeat_until_any_fail, 3}], assert_unique(cases())},
+        %% These tests actually simulate a netsplit on the distribution level.
+        %% Though, global's prevent_overlapping_partitions option starts kicking
+        %% all nodes from the cluster, so we have to be careful not to break other cases.
+        %% Setting prevent_overlapping_partitions=false on ct5 helps.
+        {cets_seq, [sequence, {repeat_until_any_fail, 2}], assert_unique(seq_cases())},
+        {cets_seq_no_log, [sequence, {repeat_until_any_fail, 2}],
+            assert_unique(cets_seq_no_log_cases())}
+    ].
+
+cases() ->
+    [
+        status_available_nodes,
+        status_available_nodes_do_not_contain_nodes_with_stopped_disco,
+        status_unavailable_nodes,
+        status_unavailable_nodes_is_subset_of_discovery_nodes,
+        status_joined_nodes,
+        status_discovery_works,
+        status_discovered_nodes,
+        status_remote_nodes_without_disco,
+        status_remote_nodes_with_unknown_tables,
+        status_remote_nodes_with_missing_nodes,
+        status_conflict_nodes,
+        format_data_does_not_return_table_duplicates
+    ].
+
+seq_cases() ->
+    [].
+
+cets_seq_no_log_cases() ->
+    [].
+
+init_per_suite(Config) ->
+    cets_test_setup:init_cleanup_table(),
+    cets_test_peer:start([ct2], Config).
+
+end_per_suite(Config) ->
+    cets_test_setup:remove_cleanup_table(),
+    cets_test_peer:stop(Config),
+    Config.
+
+init_per_group(Group, Config) when Group == cets_seq_no_log; Group == cets_no_log ->
+    [ok = logger:set_module_level(M, none) || M <- log_modules()],
+    Config;
+init_per_group(_Group, Config) ->
+    Config.
+
+end_per_group(Group, Config) when Group == cets_seq_no_log; Group == cets_no_log ->
+    [ok = logger:unset_module_level(M) || M <- log_modules()],
+    Config;
+end_per_group(_Group, Config) ->
+    Config.
+
+init_per_testcase(test_multinode_auto_discovery = Name, Config) ->
+    ct:make_priv_dir(),
+    init_per_testcase_generic(Name, Config);
+init_per_testcase(Name, Config) ->
+    init_per_testcase_generic(Name, Config).
+
+init_per_testcase_generic(Name, Config) ->
+    [{testcase, Name} | Config].
+
+end_per_testcase(_, _Config) ->
+    cets_test_setup:wait_for_cleanup(),
+    ok.
+
+%% Modules that use a multiline LOG_ macro
+log_modules() ->
+    [cets, cets_call, cets_long, cets_join, cets_discovery].
+
+status_available_nodes(Config) ->
+    Node1 = node(),
+    #{ct2 := Node2} = proplists:get_value(nodes, Config),
+    F = fun(State) ->
+        {{ok, []}, State}
+    end,
+    DiscoName = disco_name(Config),
+    start_disco(Node1, #{name => DiscoName, backend_module => cets_discovery_fun, get_nodes_fn => F}),
+    start_disco(Node2, #{name => DiscoName, backend_module => cets_discovery_fun, get_nodes_fn => F}),
+    ?assertMatch(#{available_nodes := [Node1, Node2]}, cets_status:status(DiscoName)).
+
+status_available_nodes_do_not_contain_nodes_with_stopped_disco(Config) ->
+    Node1 = node(),
+    #{ct2 := Node2} = proplists:get_value(nodes, Config),
+    F = fun(State) ->
+        {{ok, [Node1, Node2]}, State}
+    end,
+    DiscoName = disco_name(Config),
+    start_disco(Node1, #{name => DiscoName, backend_module => cets_discovery_fun, get_nodes_fn => F}),
+    %% Disco not running
+    ?assertMatch(#{available_nodes := [Node1]}, cets_status:status(DiscoName)).
+
+status_unavailable_nodes(Config) ->
+    Node1 = node(),
+    F = fun(State) ->
+        {{ok, [Node1, 'badnode@localhost']}, State}
+    end,
+    DiscoName = disco_name(Config),
+    Disco = start_disco(Node1, #{
+        name => DiscoName, backend_module => cets_discovery_fun, get_nodes_fn => F
+    }),
+    %% Disco needs at least one table to start calling get_nodes function
+    Tab = make_name(Config),
+    {ok, _} = start(Node1, Tab),
+    cets_discovery:add_table(Disco, Tab),
+    ok = wait_for_ready(DiscoName, 5000),
+    ?assertMatch(#{unavailable_nodes := ['badnode@localhost']}, cets_status:status(DiscoName)).
+
+status_unavailable_nodes_is_subset_of_discovery_nodes(Config) ->
+    Node1 = node(),
+    Self = self(),
+    GetFn1 = fun(State) -> {{ok, [Node1, 'badnode@localhost']}, State} end,
+    GetFn2 = fun(State) ->
+        Self ! get_fn2_called,
+        {{ok, [Node1]}, State}
+    end,
+    %% Setup meck
+    BackendModule = make_name(Config, disco_backend),
+    meck:new(BackendModule, [non_strict]),
+    meck:expect(BackendModule, init, fun(_Opts) -> undefined end),
+    meck:expect(BackendModule, get_nodes, GetFn1),
+    DiscoName = disco_name(Config),
+    Disco = start_disco(Node1, #{
+        name => DiscoName, backend_module => BackendModule
+    }),
+    %% Disco needs at least one table to start calling get_nodes function
+    Tab = make_name(Config),
+    {ok, _} = start(Node1, Tab),
+    cets_discovery:add_table(Disco, Tab),
+    ok = wait_for_ready(DiscoName, 5000),
+    ?assertMatch(#{unavailable_nodes := ['badnode@localhost']}, cets_status:status(DiscoName)),
+    %% Remove badnode from disco
+    meck:expect(BackendModule, get_nodes, GetFn2),
+    %% Force check.
+    Disco ! check,
+    receive_message(get_fn2_called),
+    %% The unavailable_nodes list is updated
+    CondF = fun() -> maps:get(unavailable_nodes, cets_status:status(DiscoName)) end,
+    cets_test_wait:wait_until(CondF, []).
+
+status_joined_nodes(Config) ->
+    Node1 = node(),
+    #{ct2 := Node2} = proplists:get_value(nodes, Config),
+    F = fun(State) ->
+        {{ok, [Node1, Node2]}, State}
+    end,
+    DiscoName = disco_name(Config),
+    DiscoOpts = #{
+        name => DiscoName, backend_module => cets_discovery_fun, get_nodes_fn => F
+    },
+    Disco1 = start_disco(Node1, DiscoOpts),
+    Disco2 = start_disco(Node2, DiscoOpts),
+    Tab = make_name(Config),
+    {ok, _} = start(Node1, Tab),
+    {ok, _} = start(Node2, Tab),
+    %% Add table using pids (i.e. no need to do RPCs here)
+    cets_discovery:add_table(Disco1, Tab),
+    cets_discovery:add_table(Disco2, Tab),
+    ok = wait_for_ready(DiscoName, 5000),
+    cets_test_wait:wait_until(fun() -> maps:get(joined_nodes, cets_status:status(DiscoName)) end, [
+        Node1, Node2
+    ]).
+
+status_discovery_works(Config) ->
+    Node1 = node(),
+    #{ct2 := Node2} = proplists:get_value(nodes, Config),
+    F = fun(State) ->
+        {{ok, [Node1, Node2]}, State}
+    end,
+    DiscoName = disco_name(Config),
+    DiscoOpts = #{
+        name => DiscoName, backend_module => cets_discovery_fun, get_nodes_fn => F
+    },
+    Disco1 = start_disco(Node1, DiscoOpts),
+    Disco2 = start_disco(Node2, DiscoOpts),
+    Tab = make_name(Config),
+    {ok, _} = start(Node1, Tab),
+    {ok, _} = start(Node2, Tab),
+    %% Add table using pids (i.e. no need to do RPCs here)
+    cets_discovery:add_table(Disco1, Tab),
+    cets_discovery:add_table(Disco2, Tab),
+    ok = wait_for_ready(DiscoName, 5000),
+    ?assertMatch(#{discovery_works := true}, cets_status:status(DiscoName)).
+
+status_discovered_nodes(Config) ->
+    Node1 = node(),
+    #{ct2 := Node2} = proplists:get_value(nodes, Config),
+    F = fun(State) ->
+        {{ok, [Node1, Node2]}, State}
+    end,
+    DiscoName = disco_name(Config),
+    Disco = start_disco(Node1, #{
+        name => DiscoName, backend_module => cets_discovery_fun, get_nodes_fn => F
+    }),
+    Tab = make_name(Config),
+    {ok, _} = start(Node1, Tab),
+    {ok, _} = start(Node2, Tab),
+    %% Add table using pids (i.e. no need to do RPCs here)
+    cets_discovery:add_table(Disco, Tab),
+    ok = wait_for_ready(DiscoName, 5000),
+    ?assertMatch(#{discovered_nodes := [Node1, Node2]}, cets_status:status(DiscoName)).
+
+status_remote_nodes_without_disco(Config) ->
+    Node1 = node(),
+    #{ct2 := Node2} = proplists:get_value(nodes, Config),
+    F = fun(State) ->
+        {{ok, [Node1, Node2]}, State}
+    end,
+    DiscoName = disco_name(Config),
+    Disco = start_disco(Node1, #{
+        name => DiscoName, backend_module => cets_discovery_fun, get_nodes_fn => F
+    }),
+    Tab = make_name(Config),
+    {ok, _} = start(Node1, Tab),
+    cets_discovery:add_table(Disco, Tab),
+    ok = wait_for_ready(DiscoName, 5000),
+    ?assertMatch(#{remote_nodes_without_disco := [Node2]}, cets_status:status(DiscoName)).
+
+status_remote_nodes_with_unknown_tables(Config) ->
+    Node1 = node(),
+    #{ct2 := Node2} = proplists:get_value(nodes, Config),
+    F = fun(State) ->
+        {{ok, [Node1, Node2]}, State}
+    end,
+    DiscoName = disco_name(Config),
+    DiscoOpts = #{
+        name => DiscoName, backend_module => cets_discovery_fun, get_nodes_fn => F
+    },
+    Disco1 = start_disco(Node1, DiscoOpts),
+    Disco2 = start_disco(Node2, DiscoOpts),
+    Tab1 = make_name(Config, 1),
+    Tab2 = make_name(Config, 2),
+    %% Node1 does not have Tab2
+    {ok, _} = start(Node1, Tab2),
+    {ok, _} = start(Node2, Tab1),
+    {ok, _} = start(Node2, Tab2),
+    %% Add table using pids (i.e. no need to do RPCs here)
+    cets_discovery:add_table(Disco1, Tab1),
+    cets_discovery:add_table(Disco2, Tab1),
+    cets_discovery:add_table(Disco2, Tab2),
+    ok = wait_for_ready(DiscoName, 5000),
+    cets_test_wait:wait_until(
+        fun() -> maps:get(remote_nodes_with_unknown_tables, cets_status:status(DiscoName)) end, [
+            Node2
+        ]
+    ),
+    cets_test_wait:wait_until(
+        fun() -> maps:get(remote_unknown_tables, cets_status:status(DiscoName)) end, [
+            Tab2
+        ]
+    ).
+
+status_remote_nodes_with_missing_nodes(Config) ->
+    Node1 = node(),
+    #{ct2 := Node2} = proplists:get_value(nodes, Config),
+    F = fun(State) ->
+        {{ok, [Node1, Node2]}, State}
+    end,
+    DiscoName = disco_name(Config),
+    DiscoOpts = #{
+        name => DiscoName, backend_module => cets_discovery_fun, get_nodes_fn => F
+    },
+    Disco1 = start_disco(Node1, DiscoOpts),
+    Disco2 = start_disco(Node2, DiscoOpts),
+    Tab1 = make_name(Config, 1),
+    Tab2 = make_name(Config, 2),
+    %% Node2 does not have Tab2
+    {ok, _} = start(Node1, Tab1),
+    {ok, _} = start(Node1, Tab2),
+    {ok, _} = start(Node2, Tab1),
+    cets_discovery:add_table(Disco1, Tab1),
+    cets_discovery:add_table(Disco1, Tab2),
+    cets_discovery:add_table(Disco2, Tab1),
+    ok = wait_for_ready(DiscoName, 5000),
+    cets_test_wait:wait_until(
+        fun() -> maps:get(remote_nodes_with_missing_tables, cets_status:status(DiscoName)) end, [
+            Node2
+        ]
+    ),
+    cets_test_wait:wait_until(
+        fun() -> maps:get(remote_missing_tables, cets_status:status(DiscoName)) end, [
+            Tab2
+        ]
+    ).
+
+status_conflict_nodes(Config) ->
+    Node1 = node(),
+    #{ct2 := Node2} = proplists:get_value(nodes, Config),
+    F = fun(State) ->
+        {{ok, [Node1, Node2]}, State}
+    end,
+    DiscoName = disco_name(Config),
+    DiscoOpts = #{
+        name => DiscoName, backend_module => cets_discovery_fun, get_nodes_fn => F
+    },
+    Disco1 = start_disco(Node1, DiscoOpts),
+    Disco2 = start_disco(Node2, DiscoOpts),
+    Tab1 = make_name(Config, 1),
+    Tab2 = make_name(Config, 2),
+    {ok, _} = start(Node1, Tab1),
+    {ok, _} = start(Node1, Tab2),
+    {ok, _} = start(Node2, Tab1),
+    {ok, Pid22} = start(Node2, Tab2),
+    cets_discovery:add_table(Disco1, Tab1),
+    cets_discovery:add_table(Disco1, Tab2),
+    cets_discovery:add_table(Disco2, Tab1),
+    cets_discovery:add_table(Disco2, Tab2),
+
+    ok = wait_for_ready(DiscoName, 5000),
+    set_other_servers(Pid22, []),
+    cets_test_wait:wait_until(
+        fun() -> maps:get(conflict_nodes, cets_status:status(DiscoName)) end, [Node2]
+    ),
+    cets_test_wait:wait_until(
+        fun() -> maps:get(conflict_tables, cets_status:status(DiscoName)) end, [Tab2]
+    ).
+
+format_data_does_not_return_table_duplicates(Config) ->
+    Res = cets_status:format_data(test_data_for_duplicate_missing_table_in_status(Config)),
+    ?assertMatch(#{remote_unknown_tables := [], remote_nodes_with_missing_tables := []}, Res).
+
+%% Helpers
+
+%% Gathered after Helm update
+%% with cets_status:gather_data(mongoose_cets_discovery).
+test_data_for_duplicate_missing_table_in_status(Config) ->
+    %% Create atoms in non sorted order
+    %% maps:keys returns keys in the atom-creation order (and not sorted).
+    %% Also, compiler is smart and would optimize list_to_atom("literal_string"),
+    %% so we do a module call to disable this optimization.
+    _ = list_to_atom(?MODULE:return_same("cets_external_component")),
+    _ = list_to_atom(?MODULE:return_same("cets_bosh")),
+    Name = filename:join(proplists:get_value(data_dir, Config), "status_data.txt"),
+    {ok, [Term]} = file:consult(Name),
+    Term.
+
+return_same(X) ->
+    X.
diff --git a/test/cets_SUITE_data/status_data.txt b/test/cets_status_SUITE_data/status_data.txt
similarity index 100%
rename from test/cets_SUITE_data/status_data.txt
rename to test/cets_status_SUITE_data/status_data.txt
diff --git a/test/cets_test_helper.erl b/test/cets_test_helper.erl
index 34137b8..fedc8d4 100644
--- a/test/cets_test_helper.erl
+++ b/test/cets_test_helper.erl
@@ -7,7 +7,8 @@
 ]).
 
 -export([
-    set_nodedown_timestamp/3
+    set_nodedown_timestamp/3,
+    set_other_servers/2
 ]).
 
 get_disco_timestamp(Disco, MapName, NodeKey) ->
@@ -25,3 +26,8 @@ set_nodedown_timestamp(Disco, Node, NewTimestamp) ->
     sys:replace_state(Disco, fun(#{nodedown_timestamps := Map} = State) ->
         State#{nodedown_timestamps := maps:put(Node, NewTimestamp, Map)}
     end).
+
+set_other_servers(Pid, Servers) ->
+    sys:replace_state(Pid, fun(#{other_servers := _} = State) ->
+        State#{other_servers := Servers}
+    end).

From 9dfdb285ac1dc0f0e89526825143e8d87cf7290b Mon Sep 17 00:00:00 2001
From: Mikhail Uvarov <arcusfelis@gmail.com>
Date: Tue, 12 Mar 2024 15:16:40 +0100
Subject: [PATCH 11/30] Move more cases into cets_disco_SUITE

---
 test/cets_SUITE.erl       | 177 +----------------------------------
 test/cets_disco_SUITE.erl | 190 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 186 insertions(+), 181 deletions(-)

diff --git a/test/cets_SUITE.erl b/test/cets_SUITE.erl
index 33961d0..8081932 100644
--- a/test/cets_SUITE.erl
+++ b/test/cets_SUITE.erl
@@ -34,7 +34,6 @@
     start/2,
     start_local/1,
     start_local/2,
-    start_disco/2,
     start_simple_disco/0,
     make_name/1,
     make_name/2,
@@ -44,11 +43,7 @@
     given_two_joined_tables/2,
     given_3_servers/1,
     given_3_servers/2,
-    given_n_servers/3
-]).
-
--import(cets_test_setup, [
-    make_signalling_process/0,
+    given_n_servers/3,
     make_process/0
 ]).
 
@@ -64,7 +59,6 @@
 -import(cets_test_receive, [
     receive_message/1,
     receive_message_with_arg/1,
-    receive_all_logs/1,
     assert_nothing_is_logged/2
 ]).
 
@@ -152,9 +146,6 @@ cases() ->
         test_multinode,
         test_multinode_remote_insert,
         node_list_is_correct,
-        disco_wait_for_get_nodes_works,
-        disco_wait_for_get_nodes_blocks_and_returns,
-        disco_wait_for_get_nodes_when_get_nodes_needs_to_be_retried,
         get_nodes_request,
         test_locally,
         handle_down_is_called,
@@ -203,7 +194,6 @@ cases() ->
         pinfo_returns_undefined,
         cets_ping_non_existing_node,
         cets_ping_net_family,
-        unexpected_nodedown_is_ignored_by_disco,
         ignore_send_dump_received_when_unpaused,
         ignore_send_dump_received_when_paused_with_another_pause_ref,
         pause_on_remote_node_returns_if_monitor_process_dies
@@ -229,11 +219,8 @@ seq_cases() ->
     [
         insert_returns_when_netsplit,
         inserts_after_netsplit_reconnects,
-        disco_connects_to_unconnected_node,
         joining_not_fully_connected_node_is_not_allowed,
         joining_not_fully_connected_node_is_not_allowed2,
-        %% Cannot be run in parallel with other tests because checks all logging messages.
-        logging_when_failing_join_with_disco,
         cets_ping_all_returns_when_ping_crashes,
         join_interrupted_when_ping_crashes,
         ping_pairs_returns_pongs,
@@ -1474,81 +1461,6 @@ node_list_is_correct(Config) ->
     [Node1, Node2, Node3] = other_nodes(Node4, Tab),
     ok.
 
-disco_wait_for_get_nodes_works(_Config) ->
-    F = fun(State) -> {{ok, []}, State} end,
-    {ok, Disco} = cets_discovery:start_link(#{
-        backend_module => cets_discovery_fun, get_nodes_fn => F
-    }),
-    ok = cets_discovery:wait_for_get_nodes(Disco, 5000).
-
-disco_wait_for_get_nodes_blocks_and_returns(Config) ->
-    Tab = make_name(Config, 1),
-    {ok, _Pid} = start_local(Tab, #{}),
-    SignallingPid = make_signalling_process(),
-    F = fun(State) ->
-        wait_for_down(SignallingPid),
-        {{ok, []}, State}
-    end,
-    {ok, Disco} = cets_discovery:start_link(#{
-        backend_module => cets_discovery_fun, get_nodes_fn => F
-    }),
-    cets_discovery:add_table(Disco, Tab),
-    %% Enter into a blocking get_nodes function
-    Disco ! check,
-    %% Do it async, because it would block is
-    WaitPid = spawn_link(fun() -> ok = cets_discovery:wait_for_get_nodes(Disco, 5000) end),
-    Cond = fun() ->
-        length(maps:get(pending_wait_for_get_nodes, cets_discovery:system_info(Disco)))
-    end,
-    cets_test_wait:wait_until(Cond, 1),
-    %% Unblock get_nodes call
-    SignallingPid ! stop,
-    %% wait_for_get_nodes returns
-    wait_for_down(WaitPid),
-    ok.
-
-%% Check that wait_for_get_nodes waits in case get_nodes should be retried
-disco_wait_for_get_nodes_when_get_nodes_needs_to_be_retried(Config) ->
-    Me = self(),
-    Tab = make_name(Config, 1),
-    {ok, _Pid} = start_local(Tab, #{}),
-    SignallingPid1 = make_signalling_process(),
-    SignallingPid2 = make_signalling_process(),
-    F = fun
-        (State = #{step := 1}) ->
-            wait_for_down(SignallingPid1),
-            {{ok, []}, State#{step => 2}};
-        (State = #{step := 2}) ->
-            Me ! entered_get_nodes2,
-            wait_for_down(SignallingPid2),
-            {{ok, []}, State#{step => 2}}
-    end,
-    {ok, Disco} = cets_discovery:start_link(#{
-        backend_module => cets_discovery_fun, get_nodes_fn => F, step => 1
-    }),
-    cets_discovery:add_table(Disco, Tab),
-    %% Enter into a blocking get_nodes function
-    Disco ! check,
-    %% Do it async, because it would block is
-    WaitPid = spawn_link(fun() -> ok = cets_discovery:wait_for_get_nodes(Disco, 5000) end),
-    Cond = fun() ->
-        length(maps:get(pending_wait_for_get_nodes, cets_discovery:system_info(Disco)))
-    end,
-    cets_test_wait:wait_until(Cond, 1),
-    %% Set should_retry_get_nodes
-    Disco ! check,
-    %% Ensure check message is received
-    cets_discovery:system_info(Disco),
-    %% Unblock first get_nodes call
-    SignallingPid1 ! stop,
-    receive_message(entered_get_nodes2),
-    %% Still waiting for get_nodes being retried
-    true = erlang:is_process_alive(WaitPid),
-    %% It returns finally after second get_nodes call
-    SignallingPid2 ! stop,
-    wait_for_down(WaitPid),
-    ok.
-
 get_nodes_request(Config) ->
     #{ct2 := Node2, ct3 := Node3, ct4 := Node4} = proplists:get_value(nodes, Config),
     Tab = make_name(Config),
@@ -2011,25 +1923,6 @@ inserts_after_netsplit_reconnects(Config) ->
     [{1, v2}] = dump(Node1, Tab),
     [{1, v3}] = dump(Peer5, Tab).
 
-disco_connects_to_unconnected_node(Config) ->
-    Node1 = node(),
-    #{ct5 := Peer5} = proplists:get_value(peers, Config),
-    #{ct5 := Node5} = proplists:get_value(nodes, Config),
-    ok = net_kernel:monitor_nodes(true),
-    disconnect_node(Peer5, Node1),
-    receive_message({nodedown, Node5}),
-    Tab = make_name(Config),
-    {ok, _} = start(Node1, Tab),
-    {ok, _} = start(Peer5, Tab),
-    F = fun(State) ->
-        {{ok, [Node1, Node5]}, State}
-    end,
-    {ok, Disco} = cets_discovery:start_link(#{
-        backend_module => cets_discovery_fun, get_nodes_fn => F
-    }),
-    cets_discovery:add_table(Disco, Tab),
-    ok = wait_for_ready(Disco, 5000).
-
 %% Joins from a bad (not fully connected) node
 %% Join process should check if nodes could contact each other before allowing to join
 joining_not_fully_connected_node_is_not_allowed(Config) ->
@@ -2090,60 +1983,6 @@ joining_not_fully_connected_node_is_not_allowed2(Config) ->
     end,
     [] = cets:other_pids(Pid5).
 
-logging_when_failing_join_with_disco(Config) ->
-    %% Simulate cets:other_pids/1 failing with reason:
-    %%  {{nodedown,'mongooseim@mongooseim-1.mongooseim.default.svc.cluster.local'},
-    %%   {gen_server,call,[<30887.438.0>,other_servers,infinity]}}
-    %% We use peer module to still have a connection after a disconnect from the remote node.
-    logger_debug_h:start(#{id => ?FUNCTION_NAME}),
-    Node1 = node(),
-    #{ct2 := Peer2} = proplists:get_value(peers, Config),
-    #{ct2 := Node2} = proplists:get_value(nodes, Config),
-    Tab = make_name(Config),
-    {ok, _Pid1} = start(Node1, Tab),
-    {ok, Pid2} = start(Peer2, Tab),
-    meck:new(cets, [passthrough]),
-    meck:expect(cets, other_pids, fun
-        (Server) when Server =:= Pid2 ->
-            block_node(Node2, Peer2),
-            wait_for_down(Pid2),
-            meck:passthrough([Server]);
-        (Server) ->
-            meck:passthrough([Server])
-    end),
-    F = fun(State) ->
-        {{ok, [Node1, Node2]}, State}
-    end,
-    DiscoName = disco_name(Config),
-    Disco = start_disco(Node1, #{
-        name => DiscoName, backend_module => cets_discovery_fun, get_nodes_fn => F
-    }),
-    try
-        cets_discovery:add_table(Disco, Tab),
-        timer:sleep(100),
-        Logs = receive_all_logs(?FUNCTION_NAME),
-        Reason = {{nodedown, Node2}, {gen_server, call, [Pid2, other_servers, infinity]}},
-        MatchedLogs = [
-            Log
-         || #{
-                level := error,
-                msg :=
-                    {report, #{
-                        what := task_failed,
-                        reason := Reason2
-                    }}
-            } = Log <- Logs,
-            Reason =:= Reason2
-        ],
-        %% Only one message is logged
-        ?assertMatch([_], MatchedLogs, Logs)
-    after
-        meck:unload(),
-        reconnect_node(Node2, Peer2),
-        cets:stop(Pid2)
-    end,
-    ok.
-
 cets_ping_all_returns_when_ping_crashes(Config) ->
     #{pid1 := Pid1, pid2 := Pid2} = given_two_joined_tables(Config),
     meck:new(cets, [passthrough]),
@@ -2223,20 +2062,6 @@ cets_ping_net_family(_Config) ->
     inet6 = cets_ping:net_family({ok, [["inet6"]]}),
     inet6 = cets_ping:net_family({ok, [["inet6_tls"]]}).
 
-unexpected_nodedown_is_ignored_by_disco(Config) ->
-    %% Theoretically, should not happen
-    %% Still, check that we do not crash in this case
-    DiscoName = disco_name(Config),
-    F = fun(State) -> {{ok, []}, State} end,
-    Disco = start_disco(node(), #{
-        name => DiscoName, backend_module => cets_discovery_fun, get_nodes_fn => F
-    }),
-    #{start_time := StartTime} = cets_discovery:system_info(Disco),
-    Disco ! {nodedown, 'cets@badnode'},
-    %% Check that we are still running
-    #{start_time := StartTime} = cets_discovery:system_info(Disco),
-    ok.
-
 ping_pairs_returns_pongs(Config) ->
     #{ct2 := Node2, ct3 := Node3} = proplists:get_value(nodes, Config),
     Me = node(),
diff --git a/test/cets_disco_SUITE.erl b/test/cets_disco_SUITE.erl
index 583efee..95a1232 100644
--- a/test/cets_disco_SUITE.erl
+++ b/test/cets_disco_SUITE.erl
@@ -9,12 +9,14 @@
     start/2,
     start_local/1,
     start_local/2,
+    start_disco/2,
     make_name/1,
     make_name/2,
     disco_name/1
 ]).
 
 -import(cets_test_wait, [
+    wait_for_down/1,
     wait_for_ready/2,
     wait_till_test_stage/2
 ]).
@@ -22,7 +24,8 @@
 -import(cets_test_setup, [
     setup_two_nodes_and_discovery/1,
     setup_two_nodes_and_discovery/2,
-    simulate_disco_restart/1
+    simulate_disco_restart/1,
+    make_signalling_process/0
 ]).
 
 -import(cets_test_wait, [
@@ -32,14 +35,21 @@
 
 -import(cets_test_receive, [
     receive_message/1,
-    flush_message/1
+    flush_message/1,
+    receive_all_logs/1
 ]).
 
 -import(cets_test_peer, [
+    block_node/2,
+    reconnect_node/2,
     disconnect_node/2,
     disconnect_node_by_name/2
 ]).
 
+-import(cets_test_rpc, [
+    rpc/4
+]).
+
 -import(cets_test_helper, [assert_unique/1]).
 
 -import(cets_test_rpc, [
@@ -68,6 +78,9 @@ groups() ->
 
 cases() ->
     [
+        disco_wait_for_get_nodes_works,
+        disco_wait_for_get_nodes_blocks_and_returns,
+        disco_wait_for_get_nodes_when_get_nodes_needs_to_be_retried,
         test_multinode_auto_discovery,
         test_disco_add_table,
         test_disco_delete_table,
@@ -82,7 +95,8 @@ cases() ->
         disco_uses_regular_retry_interval_in_the_regular_phase,
         disco_uses_regular_retry_interval_in_the_regular_phase_after_node_down,
         disco_uses_regular_retry_interval_in_the_regular_phase_after_expired_node_down,
-        disco_handles_node_up_and_down
+        disco_handles_node_up_and_down,
+        unexpected_nodedown_is_ignored_by_disco
     ].
 
 seq_cases() ->
@@ -96,7 +110,10 @@ seq_cases() ->
         disco_nodeup_timestamp_is_updated_after_node_reconnects,
         disco_node_start_timestamp_is_updated_after_node_restarts,
         disco_late_pang_result_arrives_after_node_went_up,
-        disco_nodeup_triggers_check_and_get_nodes
+        disco_nodeup_triggers_check_and_get_nodes,
+        %% Cannot be run in parallel with other tests because checks all logging messages.
+        logging_when_failing_join_with_disco,
+        disco_connects_to_unconnected_node
     ].
 
 cets_seq_no_log_cases() ->
@@ -110,7 +127,7 @@ cets_seq_no_log_cases() ->
 
 init_per_suite(Config) ->
     cets_test_setup:init_cleanup_table(),
-    cets_test_peer:start([ct2], Config).
+    cets_test_peer:start([ct2, ct5], Config).
 
 end_per_suite(Config) ->
     cets_test_setup:remove_cleanup_table(),
@@ -146,6 +163,81 @@ end_per_testcase(_, _Config) ->
 log_modules() ->
     [cets, cets_call, cets_long, cets_join, cets_discovery].
 
+disco_wait_for_get_nodes_works(_Config) ->
+    F = fun(State) -> {{ok, []}, State} end,
+    {ok, Disco} = cets_discovery:start_link(#{
+        backend_module => cets_discovery_fun, get_nodes_fn => F
+    }),
+    ok = cets_discovery:wait_for_get_nodes(Disco, 5000).
+
+disco_wait_for_get_nodes_blocks_and_returns(Config) ->
+    Tab = make_name(Config, 1),
+    {ok, _Pid} = start_local(Tab, #{}),
+    SignallingPid = make_signalling_process(),
+    F = fun(State) ->
+        wait_for_down(SignallingPid),
+        {{ok, []}, State}
+    end,
+    {ok, Disco} = cets_discovery:start_link(#{
+        backend_module => cets_discovery_fun, get_nodes_fn => F
+    }),
+    cets_discovery:add_table(Disco, Tab),
+    %% Enter into a blocking get_nodes function
+    Disco ! check,
+    %% Do it async, because it would block is
+    WaitPid = spawn_link(fun() -> ok = cets_discovery:wait_for_get_nodes(Disco, 5000) end),
+    Cond = fun() ->
+        length(maps:get(pending_wait_for_get_nodes, cets_discovery:system_info(Disco)))
+    end,
+    cets_test_wait:wait_until(Cond, 1),
+    %% Unblock get_nodes call
+    SignallingPid ! stop,
+    %% wait_for_get_nodes returns
+    wait_for_down(WaitPid),
+    ok.
+
+%% Check that wait_for_get_nodes waits in case get_nodes should be retried
+disco_wait_for_get_nodes_when_get_nodes_needs_to_be_retried(Config) ->
+    Me = self(),
+    Tab = make_name(Config, 1),
+    {ok, _Pid} = start_local(Tab, #{}),
+    SignallingPid1 = make_signalling_process(),
+    SignallingPid2 = make_signalling_process(),
+    F = fun
+        (State = #{step := 1}) ->
+            wait_for_down(SignallingPid1),
+            {{ok, []}, State#{step => 2}};
+        (State = #{step := 2}) ->
+            Me ! entered_get_nodes2,
+            wait_for_down(SignallingPid2),
+            {{ok, []}, State#{step => 2}}
+    end,
+    {ok, Disco} = cets_discovery:start_link(#{
+        backend_module => cets_discovery_fun, get_nodes_fn => F, step => 1
+    }),
+    cets_discovery:add_table(Disco, Tab),
+    %% Enter into a blocking get_nodes function
+    Disco ! check,
+    %% Do it async, because it would block is
+    WaitPid = spawn_link(fun() -> ok = cets_discovery:wait_for_get_nodes(Disco, 5000) end),
+    Cond = fun() ->
+        length(maps:get(pending_wait_for_get_nodes, cets_discovery:system_info(Disco)))
+    end,
+    cets_test_wait:wait_until(Cond, 1),
+    %% Set should_retry_get_nodes
+    Disco ! check,
+    %% Ensure check message is received
+    cets_discovery:system_info(Disco),
+    %% Unblock first get_nodes call
+    SignallingPid1 ! stop,
+    receive_message(entered_get_nodes2),
+    %% Still waiting for get_nodes being retried
+    true = erlang:is_process_alive(WaitPid),
+    %% It returns finally after second get_nodes call
+    SignallingPid2 ! stop,
+    wait_for_down(WaitPid),
+    ok.
+
 test_multinode_auto_discovery(Config) ->
     Node1 = node(),
     #{ct2 := Node2} = proplists:get_value(nodes, Config),
@@ -423,6 +515,20 @@ disco_handles_node_up_and_down(Config) ->
     %% Check that wait_for_ready still works
     ok = wait_for_ready(Disco, 5000).
 
+unexpected_nodedown_is_ignored_by_disco(Config) ->
+    %% Theoretically, should not happen
+    %% Still, check that we do not crash in this case
+    DiscoName = disco_name(Config),
+    F = fun(State) -> {{ok, []}, State} end,
+    Disco = start_disco(node(), #{
+        name => DiscoName, backend_module => cets_discovery_fun, get_nodes_fn => F
+    }),
+    #{start_time := StartTime} = cets_discovery:system_info(Disco),
+    Disco ! {nodedown, 'cets@badnode'},
+    %% Check that we are still running
+    #{start_time := StartTime} = cets_discovery:system_info(Disco),
+    ok.
+
 disco_logs_nodeup(Config) ->
     logger_debug_h:start(#{id => ?FUNCTION_NAME}),
     #{disco := Disco, node2 := Node2} = setup_two_nodes_and_discovery(Config),
@@ -574,3 +680,77 @@ disco_nodeup_triggers_check_and_get_nodes(Config) ->
     flush_message(get_nodes),
     Disco ! {nodeup, Node2},
     receive_message(get_nodes).
+
+disco_connects_to_unconnected_node(Config) ->
+    Node1 = node(),
+    #{ct5 := Peer5} = proplists:get_value(peers, Config),
+    #{ct5 := Node5} = proplists:get_value(nodes, Config),
+    disconnect_node(Peer5, Node1),
+    cets_test_wait:wait_until(
+        fun() -> lists:member(node(), rpc(Peer5, erlang, nodes, [])) end, false
+    ),
+    Tab = make_name(Config),
+    {ok, _} = start(Node1, Tab),
+    {ok, _} = start(Peer5, Tab),
+    F = fun(State) ->
+        {{ok, [Node1, Node5]}, State}
+    end,
+    {ok, Disco} = cets_discovery:start_link(#{
+        backend_module => cets_discovery_fun, get_nodes_fn => F
+    }),
+    cets_discovery:add_table(Disco, Tab),
+    ok = wait_for_ready(Disco, 5000).
+
+logging_when_failing_join_with_disco(Config) ->
+    %% Simulate cets:other_pids/1 failing with reason:
+    %%  {{nodedown,'mongooseim@mongooseim-1.mongooseim.default.svc.cluster.local'},
+    %%   {gen_server,call,[<30887.438.0>,other_servers,infinity]}}
+    %% We use peer module to still have a connection after a disconnect from the remote node.
+    logger_debug_h:start(#{id => ?FUNCTION_NAME}),
+    Node1 = node(),
+    #{ct2 := Peer2} = proplists:get_value(peers, Config),
+    #{ct2 := Node2} = proplists:get_value(nodes, Config),
+    Tab = make_name(Config),
+    {ok, _Pid1} = start(Node1, Tab),
+    {ok, Pid2} = start(Peer2, Tab),
+    meck:new(cets, [passthrough]),
+    meck:expect(cets, other_pids, fun
+        (Server) when Server =:= Pid2 ->
+            block_node(Node2, Peer2),
+            wait_for_down(Pid2),
+            meck:passthrough([Server]);
+        (Server) ->
+            meck:passthrough([Server])
+    end),
+    F = fun(State) ->
+        {{ok, [Node1, Node2]}, State}
+    end,
+    DiscoName = disco_name(Config),
+    Disco = start_disco(Node1, #{
+        name => DiscoName, backend_module => cets_discovery_fun, get_nodes_fn => F
+    }),
+    try
+        cets_discovery:add_table(Disco, Tab),
+        timer:sleep(100),
+        Logs = receive_all_logs(?FUNCTION_NAME),
+        Reason = {{nodedown, Node2}, {gen_server, call, [Pid2, other_servers, infinity]}},
+        MatchedLogs = [
+            Log
+         || #{
+                level := error,
+                msg :=
+                    {report, #{
+                        what := task_failed,
+                        reason := Reason2
+                    }}
+            } = Log <- Logs,
+            Reason =:= Reason2
+        ],
+        %% Only one message is logged
+        ?assertMatch([_], MatchedLogs, Logs)
+    after
+        meck:unload(),
+        reconnect_node(Node2, Peer2),
+        cets:stop(Pid2)
+    end,
+    ok.

From 9c9ff72f393e29467d1881c933a9216de1212322 Mon Sep 17 00:00:00 2001
From: Mikhail Uvarov <arcusfelis@gmail.com>
Date: Tue, 12 Mar 2024 15:22:28 +0100
Subject: [PATCH 12/30] Move receive_all_logs/ssert_nothing_is_logged into
 cets_test_log

---
 test/cets_SUITE.erl        |  5 ++---
 test/cets_disco_SUITE.erl  |  5 ++---
 test/cets_test_log.erl     | 23 ++++++++++++++++++++++-
 test/cets_test_receive.erl | 23 +----------------------
 4 files changed, 27 insertions(+), 29 deletions(-)

diff --git a/test/cets_SUITE.erl b/test/cets_SUITE.erl
index 8081932..47ea6dd 100644
--- a/test/cets_SUITE.erl
+++ b/test/cets_SUITE.erl
@@ -58,8 +58,7 @@
 
 -import(cets_test_receive, [
     receive_message/1,
-    receive_message_with_arg/1,
-    assert_nothing_is_logged/2
+    receive_message_with_arg/1
 ]).
 
 -import(cets_test_helper, [
@@ -1053,7 +1052,7 @@ join_done_already_while_waiting_for_lock_so_do_nothing(Config) ->
     %% Ensure there is nothing logged, we use log_ref to ignore logs from other tests.
     %% The counter example for no logging is
     %% the logs_are_printed_when_join_fails_because_servers_overlap testcase.
-    assert_nothing_is_logged(?FUNCTION_NAME, LogRef).
+    cets_test_log:assert_nothing_is_logged(?FUNCTION_NAME, LogRef).
 
 pause_owner_crashed_is_logged(Config) ->
     ct:timetrap({seconds, 6}),
diff --git a/test/cets_disco_SUITE.erl b/test/cets_disco_SUITE.erl
index 95a1232..357ef48 100644
--- a/test/cets_disco_SUITE.erl
+++ b/test/cets_disco_SUITE.erl
@@ -35,8 +35,7 @@
 
 -import(cets_test_receive, [
     receive_message/1,
-    flush_message/1,
-    receive_all_logs/1
+    flush_message/1
 ]).
 
 -import(cets_test_peer, [
@@ -732,7 +731,7 @@ logging_when_failing_join_with_disco(Config) ->
     try
         cets_discovery:add_table(Disco, Tab),
         timer:sleep(100),
-        Logs = receive_all_logs(?FUNCTION_NAME),
+        Logs = cets_test_log:receive_all_logs(?FUNCTION_NAME),
         Reason = {{nodedown, Node2}, {gen_server, call, [Pid2, other_servers, infinity]}},
         MatchedLogs = [
             Log
diff --git a/test/cets_test_log.erl b/test/cets_test_log.erl
index fe5277b..8369a13 100644
--- a/test/cets_test_log.erl
+++ b/test/cets_test_log.erl
@@ -2,7 +2,9 @@
 -module(cets_test_log).
 -export([
     receive_all_logs_with_log_ref/2,
-    receive_all_logs_from_pid/2
+    receive_all_logs_from_pid/2,
+    receive_all_logs/1,
+    assert_nothing_is_logged/2
 ]).
 
 -include_lib("kernel/include/logger.hrl").
@@ -61,3 +63,22 @@ ensure_logger_is_working(LogHandlerId, LogRef) ->
     after 5000 ->
         ct:fail({timeout, logger_is_broken})
     end.
+
+receive_all_logs(Id) ->
+    receive
+        {log, Id, Log} ->
+            [Log | receive_all_logs(Id)]
+    after 100 ->
+        []
+    end.
+
+assert_nothing_is_logged(LogHandlerId, LogRef) ->
+    receive
+        {log, LogHandlerId, #{
+            level := Level,
+            msg := {report, #{log_ref := LogRef}}
+        }} when Level =:= warning; Level =:= error ->
+            ct:fail(got_logging_but_should_not)
+    after 0 ->
+        ok
+    end.
diff --git a/test/cets_test_receive.erl b/test/cets_test_receive.erl
index 6fd2116..c84e869 100644
--- a/test/cets_test_receive.erl
+++ b/test/cets_test_receive.erl
@@ -2,9 +2,7 @@
 -export([
     receive_message/1,
     receive_message_with_arg/1,
-    flush_message/1,
-    receive_all_logs/1,
-    assert_nothing_is_logged/2
+    flush_message/1
 ]).
 
 receive_message(M) ->
@@ -26,22 +24,3 @@ flush_message(M) ->
     after 0 ->
         ok
     end.
-
-receive_all_logs(Id) ->
-    receive
-        {log, Id, Log} ->
-            [Log | receive_all_logs(Id)]
-    after 100 ->
-        []
-    end.
-
-assert_nothing_is_logged(LogHandlerId, LogRef) ->
-    receive
-        {log, LogHandlerId, #{
-            level := Level,
-            msg := {report, #{log_ref := LogRef}}
-        }} when Level =:= warning; Level =:= error ->
-            ct:fail(got_logging_but_should_not)
-    after 0 ->
-        ok
-    end.

From 3f9e50a2e22fd9a9037b697e8b26340c9cdce09b Mon Sep 17 00:00:00 2001
From: Mikhail Uvarov <arcusfelis@gmail.com>
Date: Tue, 12 Mar 2024 15:29:00 +0100
Subject: [PATCH 13/30] Move
 unknown_message_is_ignored_in_disco_process/code_change_returns_ok_for_disco
 into cets_disco_SUITE

---
 test/cets_SUITE.erl       | 15 ---------------
 test/cets_disco_SUITE.erl | 16 +++++++++++++++-
 2 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/test/cets_SUITE.erl b/test/cets_SUITE.erl
index 47ea6dd..90816cf 100644
--- a/test/cets_SUITE.erl
+++ b/test/cets_SUITE.erl
@@ -34,11 +34,9 @@
     start/2,
     start_local/1,
     start_local/2,
-    start_simple_disco/0,
     make_name/1,
     make_name/2,
     lock_name/1,
-    disco_name/1,
     given_two_joined_tables/1,
     given_two_joined_tables/2,
     given_3_servers/1,
@@ -180,10 +178,8 @@ cases() ->
         unknown_message_is_ignored_in_ack_process,
         unknown_cast_message_is_ignored_in_ack_process,
         unknown_call_returns_error_from_ack_process,
-        unknown_message_is_ignored_in_disco_process,
         code_change_returns_ok,
         code_change_returns_ok_for_ack,
-        code_change_returns_ok_for_disco,
         run_spawn_forwards_errors,
         run_tracked_failed,
         run_tracked_logged,
@@ -1759,11 +1755,6 @@ unknown_message_is_ignored_in_ack_process(Config) ->
     AckPid ! oops,
     still_works(Pid).
 
-unknown_message_is_ignored_in_disco_process(_Config) ->
-    Pid = start_simple_disco(),
-    Pid ! oops,
-    #{} = sys:get_state(Pid).
-
 unknown_cast_message_is_ignored_in_ack_process(Config) ->
     {ok, Pid} = start_local(make_name(Config)),
     #{ack_pid := AckPid} = cets:info(Pid),
@@ -1789,12 +1780,6 @@ code_change_returns_ok_for_ack(Config) ->
     ok = sys:change_code(AckPid, cets_ack, v2, []),
     sys:resume(AckPid).
 
-code_change_returns_ok_for_disco(_Config) ->
-    Pid = start_simple_disco(),
-    sys:suspend(Pid),
-    ok = sys:change_code(Pid, cets_ack, v2, []),
-    sys:resume(Pid).
-
 run_spawn_forwards_errors(_Config) ->
     ?assertException(
         error,
diff --git a/test/cets_disco_SUITE.erl b/test/cets_disco_SUITE.erl
index 357ef48..08b410d 100644
--- a/test/cets_disco_SUITE.erl
+++ b/test/cets_disco_SUITE.erl
@@ -10,6 +10,7 @@
     start_local/1,
     start_local/2,
     start_disco/2,
+    start_simple_disco/0,
     make_name/1,
     make_name/2,
     disco_name/1
@@ -95,7 +96,9 @@ cases() ->
         disco_uses_regular_retry_interval_in_the_regular_phase_after_node_down,
         disco_uses_regular_retry_interval_in_the_regular_phase_after_expired_node_down,
         disco_handles_node_up_and_down,
-        unexpected_nodedown_is_ignored_by_disco
+        unexpected_nodedown_is_ignored_by_disco,
+        unknown_message_is_ignored_in_disco_process,
+        code_change_returns_ok_for_disco
     ].
 
 seq_cases() ->
@@ -753,3 +756,14 @@ logging_when_failing_join_with_disco(Config) ->
         cets:stop(Pid2)
     end,
     ok.
+
+unknown_message_is_ignored_in_disco_process(_Config) ->
+    Pid = start_simple_disco(),
+    Pid ! oops,
+    #{} = sys:get_state(Pid).
+
+code_change_returns_ok_for_disco(_Config) ->
+    Pid = start_simple_disco(),
+    sys:suspend(Pid),
+    ok = sys:change_code(Pid, cets_ack, v2, []),
+    sys:resume(Pid).

From 79bea515c51730ca101eabf37f461b0597b3d626 Mon Sep 17 00:00:00 2001
From: Mikhail Uvarov <arcusfelis@gmail.com>
Date: Tue, 12 Mar 2024 15:33:04 +0100
Subject: [PATCH 14/30] Cleanup cets_status_SUITE

---
 test/cets_status_SUITE.erl | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/test/cets_status_SUITE.erl b/test/cets_status_SUITE.erl
index fd6ccd5..a6bb648 100644
--- a/test/cets_status_SUITE.erl
+++ b/test/cets_status_SUITE.erl
@@ -53,21 +53,12 @@
 all() ->
     [
         {group, cets}
-        %       {group, cets_seq},
-        %       {group, cets_seq_no_log}
     ].
 
 groups() ->
     %% Cases should have unique names, because we name CETS servers based on case names
     [
-        {cets, [parallel, {repeat_until_any_fail, 3}], assert_unique(cases())},
-        %% These tests actually simulate a netsplit on the distribution level.
-        %% Though, global's prevent_overlapping_partitions option starts kicking
-        %% all nodes from the cluster, so we have to be careful not to break other cases.
-        %% Setting prevent_overlapping_partitions=false on ct5 helps.
-        {cets_seq, [sequence, {repeat_until_any_fail, 2}], assert_unique(seq_cases())},
-        {cets_seq_no_log, [sequence, {repeat_until_any_fail, 2}],
-            assert_unique(cets_seq_no_log_cases())}
+        {cets, [parallel, {repeat_until_any_fail, 3}], assert_unique(cases())}
     ].
 
 cases() ->
@@ -86,12 +77,6 @@ cases() ->
         format_data_does_not_return_table_duplicates
     ].
 
-seq_cases() ->
-    [].
-
-cets_seq_no_log_cases() ->
-    [].
-
 init_per_suite(Config) ->
     cets_test_setup:init_cleanup_table(),
     cets_test_peer:start([ct2], Config).

From 9f4db45a165a7cb44917656404b255bf0d46bb10 Mon Sep 17 00:00:00 2001
From: Mikhail Uvarov <arcusfelis@gmail.com>
Date: Tue, 12 Mar 2024 16:18:23 +0100
Subject: [PATCH 15/30] Use cets_test_peer:start/2 everywhere

---
 test/cets_SUITE.erl              |  9 ++-------
 test/cets_dist_blocker_SUITE.erl | 21 ++++++++-------------
 test/cets_test_peer.erl          |  7 +++----
 3 files changed, 13 insertions(+), 24 deletions(-)

diff --git a/test/cets_SUITE.erl b/test/cets_SUITE.erl
index 90816cf..518af65 100644
--- a/test/cets_SUITE.erl
+++ b/test/cets_SUITE.erl
@@ -237,16 +237,11 @@ cets_seq_no_log_cases() ->
 
 init_per_suite(Config) ->
     cets_test_setup:init_cleanup_table(),
-    Names = [ct2, ct3, ct4, ct5, ct6, ct7],
-    {Nodes, Peers} = lists:unzip([cets_test_peer:start_node(N) || N <- Names]),
-    [
-        {nodes, maps:from_list(lists:zip(Names, Nodes))},
-        {peers, maps:from_list(lists:zip(Names, Peers))}
-        | Config
-    ].
+    cets_test_peer:start([ct2, ct3, ct4, ct5, ct6, ct7], Config).
 
 end_per_suite(Config) ->
     cets_test_setup:remove_cleanup_table(),
+    cets_test_peer:stop(Config),
     Config.
 
 init_per_group(Group, Config) when Group == cets_seq_no_log; Group == cets_no_log ->
diff --git a/test/cets_dist_blocker_SUITE.erl b/test/cets_dist_blocker_SUITE.erl
index 5353cbb..9c4f882 100644
--- a/test/cets_dist_blocker_SUITE.erl
+++ b/test/cets_dist_blocker_SUITE.erl
@@ -35,15 +35,10 @@ unknown_cases() ->
     ].
 
 init_per_suite(Config) ->
-    Names = [peer_ct2],
-    {Nodes, Peers} = lists:unzip([cets_test_peer:start_node(N) || N <- Names]),
-    [
-        {nodes, maps:from_list(lists:zip(Names, Nodes))},
-        {peers, maps:from_list(lists:zip(Names, Peers))}
-        | Config
-    ].
+    cets_test_peer:start([ct2], Config).
 
 end_per_suite(Config) ->
+    cets_test_peer:stop(Config),
     Config.
 
 init_per_group(_Group, Config) ->
@@ -64,7 +59,7 @@ end_per_testcase(_, _Config) ->
 %% Test blocking functionality
 
 waits_for_cleaning(Config) ->
-    #{peer_ct2 := Node2} = proplists:get_value(nodes, Config),
+    #{ct2 := Node2} = proplists:get_value(nodes, Config),
     {ok, Blocker} = cets_dist_blocker:start_link(),
     cets_dist_blocker:add_cleaner(self()),
     connect_and_disconnect(Node2),
@@ -75,7 +70,7 @@ waits_for_cleaning(Config) ->
     gen_server:stop(Blocker).
 
 unblocks_if_cleaner_goes_down(Config) ->
-    #{peer_ct2 := Node2} = proplists:get_value(nodes, Config),
+    #{ct2 := Node2} = proplists:get_value(nodes, Config),
     {ok, Blocker} = cets_dist_blocker:start_link(),
     Cleaner = spawn_cleaner(),
     connect_and_disconnect(Node2),
@@ -86,7 +81,7 @@ unblocks_if_cleaner_goes_down(Config) ->
     gen_server:stop(Blocker).
 
 unblocks_if_cleaner_goes_down_and_second_cleaner_says_done(Config) ->
-    #{peer_ct2 := Node2} = proplists:get_value(nodes, Config),
+    #{ct2 := Node2} = proplists:get_value(nodes, Config),
     {ok, Blocker} = cets_dist_blocker:start_link(),
     %% Two cleaners
     cets_dist_blocker:add_cleaner(self()),
@@ -101,7 +96,7 @@ unblocks_if_cleaner_goes_down_and_second_cleaner_says_done(Config) ->
     gen_server:stop(Blocker).
 
 unblocks_if_cleaner_says_done_and_second_cleaner_goes_down(Config) ->
-    #{peer_ct2 := Node2} = proplists:get_value(nodes, Config),
+    #{ct2 := Node2} = proplists:get_value(nodes, Config),
     {ok, Blocker} = cets_dist_blocker:start_link(),
     %% Two cleaners
     cets_dist_blocker:add_cleaner(self()),
@@ -117,7 +112,7 @@ unblocks_if_cleaner_says_done_and_second_cleaner_goes_down(Config) ->
     gen_server:stop(Blocker).
 
 blocks_if_cleaner_says_done_and_second_cleaner_does_not_ack(Config) ->
-    #{peer_ct2 := Node2} = proplists:get_value(nodes, Config),
+    #{ct2 := Node2} = proplists:get_value(nodes, Config),
     {ok, Blocker} = cets_dist_blocker:start_link(),
     %% Two cleaners
     cets_dist_blocker:add_cleaner(self()),
@@ -131,7 +126,7 @@ blocks_if_cleaner_says_done_and_second_cleaner_does_not_ack(Config) ->
     gen_server:stop(Blocker).
 
 skip_blocking_if_no_cleaners(Config) ->
-    #{peer_ct2 := Node2} = proplists:get_value(nodes, Config),
+    #{ct2 := Node2} = proplists:get_value(nodes, Config),
     {ok, Blocker} = cets_dist_blocker:start_link(),
     pong = net_adm:ping(Node2),
     true = erlang:disconnect_node(Node2),
diff --git a/test/cets_test_peer.erl b/test/cets_test_peer.erl
index d821a67..632efbe 100644
--- a/test/cets_test_peer.erl
+++ b/test/cets_test_peer.erl
@@ -2,7 +2,6 @@
 -export([
     start/2,
     stop/1,
-    start_node/1,
     node_to_peer/1
 ]).
 
@@ -18,7 +17,7 @@
 -include_lib("common_test/include/ct.hrl").
 
 start(Names, Config) ->
-    {Nodes, Peers} = lists:unzip([cets_test_peer:start_node(name(N)) || N <- Names]),
+    {Nodes, Peers} = lists:unzip([start_node(N) || N <- Names]),
     [
         {nodes, maps:from_list(lists:zip(Names, Nodes))},
         {peers, maps:from_list(lists:zip(Names, Peers))}
@@ -33,9 +32,9 @@ stop(Config) ->
 name(Node) ->
     list_to_atom(peer:random_name(atom_to_list(Node))).
 
-start_node(Sname) ->
+start_node(Id) ->
     {ok, Peer, Node} = ?CT_PEER(#{
-        name => Sname, connection => standard_io, args => extra_args(Sname)
+        name => name(Id), connection => standard_io, args => extra_args(Id)
     }),
     %% Register so we can find Peer process later in code
     register(node_to_peer_name(Node), Peer),

From a87e6d537259c321501f60485ba682dacfd7de9f Mon Sep 17 00:00:00 2001
From: Mikhail Uvarov <arcusfelis@gmail.com>
Date: Tue, 12 Mar 2024 16:44:20 +0100
Subject: [PATCH 16/30] Make cets_join_SUITE

---
 test/cets_SUITE.erl      | 217 -------------------------
 test/cets_join_SUITE.erl | 337 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 337 insertions(+), 217 deletions(-)
 create mode 100644 test/cets_join_SUITE.erl

diff --git a/test/cets_SUITE.erl b/test/cets_SUITE.erl
index 518af65..e4a8683 100644
--- a/test/cets_SUITE.erl
+++ b/test/cets_SUITE.erl
@@ -97,14 +97,7 @@ cases() ->
         insert_many_with_two_records,
         delete_works,
         delete_many_works,
-        join_works,
         inserted_records_could_be_read_back_from_replicated_table,
-        join_works_with_existing_data,
-        join_works_with_existing_data_with_conflicts,
-        join_works_with_existing_data_with_conflicts_and_defined_conflict_handler,
-        join_works_with_existing_data_with_conflicts_and_defined_conflict_handler_and_more_keys,
-        join_works_with_existing_data_with_conflicts_and_defined_conflict_handler_and_keypos2,
-        bag_with_conflict_handler_not_allowed,
         bag_with_conflict_handler_not_allowed_for_start_link,
         insert_new_works,
         insert_new_works_with_table_name,
@@ -125,12 +118,6 @@ cases() ->
         insert_serial_works_when_leader_is_back,
         insert_serial_blocks_when_leader_is_not_back,
         leader_is_the_same_in_metadata_after_join,
-        join_with_the_same_pid,
-        join_ref_is_same_after_join,
-        join_fails_because_server_process_not_found,
-        join_fails_because_server_process_not_found_before_get_pids,
-        join_fails_before_send_dump,
-        join_fails_before_send_dump_and_there_are_pending_remote_ops,
         send_dump_fails_during_join_because_receiver_exits,
         join_fails_in_check_fully_connected,
         join_fails_because_join_refs_do_not_match_for_nodes_in_segment,
@@ -308,9 +295,6 @@ delete_many_works(Config) ->
     cets:delete_many(Tab, [alice]),
     [] = ets:lookup(Tab, alice).
 
-join_works(Config) ->
-    given_two_joined_tables(Config).
-
 inserted_records_could_be_read_back_from_replicated_table(Config) ->
     #{tab1 := Tab1, tab2 := Tab2} = given_two_joined_tables(Config),
     cets:insert(Tab1, {alice, 32}),
@@ -662,207 +646,6 @@ leader_is_the_same_in_metadata_after_join(Config) ->
     Leader = cets_metadata:get(T1, leader),
     Leader = cets_metadata:get(T2, leader).
 
-join_works_with_existing_data(Config) ->
-    Tab1 = make_name(Config, 1),
-    Tab2 = make_name(Config, 2),
-    {ok, Pid1} = start_local(Tab1),
-    {ok, Pid2} = start_local(Tab2),
-    cets:insert(Tab1, {alice, 32}),
-    %% Join will copy and merge existing tables
-    ok = cets_join:join(lock_name(Config), #{}, Pid1, Pid2),
-    [{alice, 32}] = ets:lookup(Tab2, alice).
-
-%% This testcase tests an edgecase: inserting with the same key from two nodes.
-%% Usually, inserting with the same key from two different nodes is not possible
-%% (because the node-name is a part of the key).
-join_works_with_existing_data_with_conflicts(Config) ->
-    Tab1 = make_name(Config, 1),
-    Tab2 = make_name(Config, 2),
-    {ok, Pid1} = start_local(Tab1),
-    {ok, Pid2} = start_local(Tab2),
-    cets:insert(Tab1, {alice, 32}),
-    cets:insert(Tab2, {alice, 33}),
-    %% Join will copy and merge existing tables
-    ok = cets_join:join(lock_name(Config), #{}, Pid1, Pid2),
-    %% We insert data from other table into our table when merging, so the values get swapped
-    [{alice, 33}] = ets:lookup(Tab1, alice),
-    [{alice, 32}] = ets:lookup(Tab2, alice).
-
-join_works_with_existing_data_with_conflicts_and_defined_conflict_handler(Config) ->
-    Opts = #{handle_conflict => fun resolve_highest/2},
-    Tab1 = make_name(Config, 1),
-    Tab2 = make_name(Config, 2),
-    {ok, Pid1} = start_local(Tab1, Opts),
-    {ok, Pid2} = start_local(Tab2, Opts),
-    cets:insert(Tab1, {alice, 32}),
-    cets:insert(Tab2, {alice, 33}),
-    %% Join will copy and merge existing tables
-    ok = cets_join:join(lock_name(Config), #{}, Pid1, Pid2),
-    %% Key with the highest Number remains
-    [{alice, 33}] = ets:lookup(Tab1, alice),
-    [{alice, 33}] = ets:lookup(Tab2, alice).
-
-join_works_with_existing_data_with_conflicts_and_defined_conflict_handler_and_more_keys(Config) ->
-    %% Deeper testing of cets_join:apply_resolver function
-    Opts = #{handle_conflict => fun resolve_highest/2},
-    #{tabs := [T1, T2, T3], pids := [Pid1, Pid2, Pid3]} = given_3_servers(Config, Opts),
-    cets:insert_many(T1, [{alice, 32}, {bob, 10}, {michal, 40}]),
-    cets:insert_many(T2, [{alice, 33}, {kate, 3}, {michal, 2}]),
-    %% Join will copy and merge existing tables
-    ok = cets_join:join(lock_name(Config), #{}, Pid1, Pid2),
-    ok = cets_join:join(lock_name(Config), #{}, Pid1, Pid3),
-    %% Key with the highest Number remains
-    Dump = [{alice, 33}, {bob, 10}, {kate, 3}, {michal, 40}],
-    Dump = cets:dump(T1),
-    Dump = cets:dump(T2),
-    Dump = cets:dump(T3).
-
--record(user, {name, age, updated}).
-
-%% Test with records (which require keypos = 2 option)
-join_works_with_existing_data_with_conflicts_and_defined_conflict_handler_and_keypos2(Config) ->
-    Opts = #{handle_conflict => fun resolve_user_conflict/2, keypos => 2},
-    T1 = make_name(Config, 1),
-    T2 = make_name(Config, 2),
-    {ok, Pid1} = start_local(T1, Opts),
-    {ok, Pid2} = start_local(T2, Opts),
-    cets:insert(T1, #user{name = alice, age = 30, updated = erlang:system_time()}),
-    cets:insert(T2, #user{name = alice, age = 25, updated = erlang:system_time()}),
-    %% Join will copy and merge existing tables
-    ok = cets_join:join(keypos2_lock, #{}, Pid1, Pid2),
-    %% Last inserted record is in the table
-    [#user{age = 25}] = ets:lookup(T1, alice),
-    [#user{age = 25}] = ets:lookup(T2, alice).
-
-%% Keep record with highest timestamp
-resolve_user_conflict(U1 = #user{updated = TS1}, _U2 = #user{updated = TS2}) when
-    TS1 > TS2
-->
-    U1;
-resolve_user_conflict(_U1, U2) ->
-    U2.
-
-resolve_highest({K, A}, {K, B}) ->
-    {K, max(A, B)}.
-
-bag_with_conflict_handler_not_allowed(Config) ->
-    {error, [bag_with_conflict_handler]} =
-        cets:start(make_name(Config), #{handle_conflict => fun resolve_highest/2, type => bag}).
-
-bag_with_conflict_handler_not_allowed_for_start_link(Config) ->
-    {error, [bag_with_conflict_handler]} =
-        cets:start_link(make_name(Config), #{handle_conflict => fun resolve_highest/2, type => bag}).
-
-join_with_the_same_pid(Config) ->
-    Tab = make_name(Config),
-    {ok, Pid} = start_local(Tab),
-    %% Just insert something into a table to check later the size
-    cets:insert(Tab, {1, 1}),
-    link(Pid),
-    {error, join_with_the_same_pid} = cets_join:join(lock_name(Config), #{}, Pid, Pid),
-    Nodes = [node()],
-    %% The process is still running and no data loss (i.e. size is not zero)
-    #{nodes := Nodes, size := 1} = cets:info(Pid).
-
-join_ref_is_same_after_join(Config) ->
-    #{pid1 := Pid1, pid2 := Pid2} = given_two_joined_tables(Config),
-    #{join_ref := JoinRef} = cets:info(Pid1),
-    #{join_ref := JoinRef} = cets:info(Pid2).
-
-join_fails_because_server_process_not_found(Config) ->
-    {ok, Pid1} = start_local(make_name(Config, 1)),
-    {ok, Pid2} = start_local(make_name(Config, 2)),
-    F = fun
-        (join_start) ->
-            exit(Pid1, sim_error);
-        (_) ->
-            ok
-    end,
-    {error, {task_failed, {noproc, {gen_server, call, [Pid1, get_info, infinity]}}, _}} =
-        cets_join:join(lock_name(Config), #{}, Pid1, Pid2, #{checkpoint_handler => F}).
-
-join_fails_because_server_process_not_found_before_get_pids(Config) ->
-    {ok, Pid1} = start_local(make_name(Config, 1)),
-    {ok, Pid2} = start_local(make_name(Config, 2)),
-    F = fun
-        (before_get_pids) ->
-            exit(Pid1, sim_error);
-        (_) ->
-            ok
-    end,
-    {error, {task_failed, {noproc, {gen_server, call, [Pid1, other_servers, infinity]}}, _}} =
-        cets_join:join(lock_name(Config), #{}, Pid1, Pid2, #{checkpoint_handler => F}).
-
-join_fails_before_send_dump(Config) ->
-    Me = self(),
-    DownFn = fun(#{remote_pid := RemotePid, table := _Tab}) ->
-        Me ! {down_called, self(), RemotePid}
-    end,
-    {ok, Pid1} = start_local(make_name(Config, 1), #{handle_down => DownFn}),
-    {ok, Pid2} = start_local(make_name(Config, 2), #{}),
-    cets:insert(Pid1, {1}),
-    cets:insert(Pid2, {2}),
-    F = fun
-        ({before_send_dump, P}) when Pid1 =:= P ->
-            Me ! before_send_dump_called_for_pid1;
-        ({before_send_dump, P}) when Pid2 =:= P ->
-            error(sim_error);
-        (_) ->
-            ok
-    end,
-    ?assertMatch(
-        {error, {task_failed, sim_error, #{}}},
-        cets_join:join(lock_name(Config), #{}, Pid1, Pid2, #{checkpoint_handler => F})
-    ),
-    %% Ensure we sent dump to Pid1
-    receive_message(before_send_dump_called_for_pid1),
-    %% Not joined, some data exchanged
-    cets:ping_all(Pid1),
-    cets:ping_all(Pid2),
-    [] = cets:other_pids(Pid1),
-    [] = cets:other_pids(Pid2),
-    %% Pid1 applied new version of dump
-    %% Though, it got disconnected after
-    {ok, [{1}, {2}]} = cets:remote_dump(Pid1),
-    %% Pid2 rejected changes
-    {ok, [{2}]} = cets:remote_dump(Pid2),
-    receive_message({down_called, Pid1, Pid2}).
-
-%% Checks that remote ops are dropped if join_ref does not match in the state and in remote_op message
-join_fails_before_send_dump_and_there_are_pending_remote_ops(Config) ->
-    Me = self(),
-    {ok, Pid1} = start_local(make_name(Config, 1)),
-    {ok, Pid2} = start_local(make_name(Config, 2)),
-    F = fun
-        ({before_send_dump, P}) when Pid1 =:= P ->
-            Me ! before_send_dump_called_for_pid1;
-        ({before_send_dump, P}) when Pid2 =:= P ->
-            sys:suspend(Pid2),
-            error(sim_error);
-        (before_unpause) ->
-            %% Crash in before_unpause, otherwise cets_join will block in cets:unpause/2
-            %% (because Pid2 is suspended).
-            %% Servers would be unpaused automatically though, because cets_join process exits
-            %% (i.e. cets:unpause/2 call is totally optional)
-            error(sim_error2);
-        (_) ->
-            ok
-    end,
-    ?assertMatch(
-        {error, {task_failed, sim_error2, #{}}},
-        cets_join:join(lock_name(Config), #{}, Pid1, Pid2, #{checkpoint_handler => F})
-    ),
-    %% Ensure we sent dump to Pid1
-    receive_message(before_send_dump_called_for_pid1),
-    cets:insert_request(Pid1, {1}),
-    %% Check that the remote_op has reached Pid2 message box
-    cets_test_wait:wait_for_remote_ops_in_the_message_box(Pid2, 1),
-    sys:resume(Pid2),
-    %% Wait till remote_op is processed
-    cets:ping(Pid2),
-    %% Check that the insert was ignored
-    {ok, []} = cets:remote_dump(Pid2).
-
 send_dump_fails_during_join_because_receiver_exits(Config) ->
     Me = self(),
     DownFn = fun(#{remote_pid := RemotePid, table := _Tab}) ->
diff --git a/test/cets_join_SUITE.erl b/test/cets_join_SUITE.erl
new file mode 100644
index 0000000..522b125
--- /dev/null
+++ b/test/cets_join_SUITE.erl
@@ -0,0 +1,337 @@
+-module(cets_join_SUITE).
+-include_lib("common_test/include/ct.hrl").
+-include_lib("eunit/include/eunit.hrl").
+-include_lib("kernel/include/logger.hrl").
+
+-compile([export_all, nowarn_export_all]).
+
+-import(cets_test_setup, [
+    start/2,
+    start_local/1,
+    start_local/2,
+    start_disco/2,
+    start_simple_disco/0,
+    make_name/1,
+    make_name/2,
+    lock_name/1,
+    disco_name/1
+]).
+
+-import(cets_test_wait, [
+    wait_for_down/1,
+    wait_for_ready/2,
+    wait_till_test_stage/2
+]).
+
+-import(cets_test_setup, [
+    setup_two_nodes_and_discovery/1,
+    setup_two_nodes_and_discovery/2,
+    simulate_disco_restart/1,
+    make_signalling_process/0,
+    given_two_joined_tables/1,
+    given_two_joined_tables/2,
+    given_3_servers/2
+]).
+
+-import(cets_test_wait, [
+    wait_for_disco_timestamp_to_appear/3,
+    wait_for_disco_timestamp_to_be_updated/4
+]).
+
+-import(cets_test_receive, [
+    receive_message/1,
+    flush_message/1
+]).
+
+-import(cets_test_peer, [
+    block_node/2,
+    reconnect_node/2,
+    disconnect_node/2,
+    disconnect_node_by_name/2
+]).
+
+-import(cets_test_rpc, [
+    rpc/4
+]).
+
+-import(cets_test_helper, [assert_unique/1]).
+
+-import(cets_test_rpc, [
+    other_nodes/2
+]).
+
+all() ->
+    [
+        {group, cets}
+        %       {group, cets_seq},
+        %       {group, cets_seq_no_log}
+    ].
+
+groups() ->
+    %% Cases should have unique names, because we name CETS servers based on case names
+    [
+        {cets, [parallel, {repeat_until_any_fail, 3}], assert_unique(cases())},
+        %% These tests actually simulate a netsplit on the distribution level.
+        %% Though, global's prevent_overlapping_partitions option starts kicking
+        %% all nodes from the cluster, so we have to be careful not to break other cases.
+        %% Setting prevent_overlapping_partitions=false on ct5 helps.
+        {cets_seq, [sequence, {repeat_until_any_fail, 2}], assert_unique(seq_cases())},
+        {cets_seq_no_log, [sequence, {repeat_until_any_fail, 2}],
+            assert_unique(cets_seq_no_log_cases())}
+    ].
+
+cases() ->
+    [
+        join_works,
+        join_works_with_existing_data_with_conflicts_and_defined_conflict_handler,
+        join_works_with_existing_data_with_conflicts_and_defined_conflict_handler_and_more_keys,
+        join_works_with_existing_data_with_conflicts_and_defined_conflict_handler_and_keypos2,
+        bag_with_conflict_handler_not_allowed,
+        join_with_the_same_pid,
+        join_ref_is_same_after_join,
+        join_fails_because_server_process_not_found,
+        join_fails_because_server_process_not_found_before_get_pids,
+        join_fails_before_send_dump,
+        join_fails_before_send_dump_and_there_are_pending_remote_ops
+    ].
+
+seq_cases() ->
+    [].
+
+cets_seq_no_log_cases() ->
+    [].
+
+init_per_suite(Config) ->
+    cets_test_setup:init_cleanup_table(),
+    cets_test_peer:start([ct2, ct5], Config).
+
+end_per_suite(Config) ->
+    cets_test_setup:remove_cleanup_table(),
+    cets_test_peer:stop(Config),
+    Config.
+
+init_per_group(Group, Config) when Group == cets_seq_no_log; Group == cets_no_log ->
+    [ok = logger:set_module_level(M, none) || M <- log_modules()],
+    Config;
+init_per_group(_Group, Config) ->
+    Config.
+
+end_per_group(Group, Config) when Group == cets_seq_no_log; Group == cets_no_log ->
+    [ok = logger:unset_module_level(M) || M <- log_modules()],
+    Config;
+end_per_group(_Group, Config) ->
+    Config.
+
+init_per_testcase(Name, Config) ->
+    init_per_testcase_generic(Name, Config).
+
+init_per_testcase_generic(Name, Config) ->
+    [{testcase, Name} | Config].
+
+end_per_testcase(_, _Config) ->
+    cets_test_setup:wait_for_cleanup(),
+    ok.
+
+%% Modules that use a multiline LOG_ macro
+log_modules() ->
+    [cets, cets_call, cets_long, cets_join, cets_discovery].
+
+join_works(Config) ->
+    given_two_joined_tables(Config).
+
+join_works_with_existing_data(Config) ->
+    Tab1 = make_name(Config, 1),
+    Tab2 = make_name(Config, 2),
+    {ok, Pid1} = start_local(Tab1),
+    {ok, Pid2} = start_local(Tab2),
+    cets:insert(Tab1, {alice, 32}),
+    %% Join will copy and merge existing tables
+    ok = cets_join:join(lock_name(Config), #{}, Pid1, Pid2),
+    [{alice, 32}] = ets:lookup(Tab2, alice).
+
+%% This testcase tests an edgecase: inserting with the same key from two nodes.
+%% Usually, inserting with the same key from two different nodes is not possible
+%% (because the node-name is a part of the key).
+join_works_with_existing_data_with_conflicts(Config) ->
+    Tab1 = make_name(Config, 1),
+    Tab2 = make_name(Config, 2),
+    {ok, Pid1} = start_local(Tab1),
+    {ok, Pid2} = start_local(Tab2),
+    cets:insert(Tab1, {alice, 32}),
+    cets:insert(Tab2, {alice, 33}),
+    %% Join will copy and merge existing tables
+    ok = cets_join:join(lock_name(Config), #{}, Pid1, Pid2),
+    %% We insert data from other table into our table when merging, so the values get swapped
+    [{alice, 33}] = ets:lookup(Tab1, alice),
+    [{alice, 32}] = ets:lookup(Tab2, alice).
+
+join_works_with_existing_data_with_conflicts_and_defined_conflict_handler(Config) ->
+    Opts = #{handle_conflict => fun resolve_highest/2},
+    Tab1 = make_name(Config, 1),
+    Tab2 = make_name(Config, 2),
+    {ok, Pid1} = start_local(Tab1, Opts),
+    {ok, Pid2} = start_local(Tab2, Opts),
+    cets:insert(Tab1, {alice, 32}),
+    cets:insert(Tab2, {alice, 33}),
+    %% Join will copy and merge existing tables
+    ok = cets_join:join(lock_name(Config), #{}, Pid1, Pid2),
+    %% Key with the highest Number remains
+    [{alice, 33}] = ets:lookup(Tab1, alice),
+    [{alice, 33}] = ets:lookup(Tab2, alice).
+
+join_works_with_existing_data_with_conflicts_and_defined_conflict_handler_and_more_keys(Config) ->
+    %% Deeper testing of cets_join:apply_resolver function
+    Opts = #{handle_conflict => fun resolve_highest/2},
+    #{tabs := [T1, T2, T3], pids := [Pid1, Pid2, Pid3]} = given_3_servers(Config, Opts),
+    cets:insert_many(T1, [{alice, 32}, {bob, 10}, {michal, 40}]),
+    cets:insert_many(T2, [{alice, 33}, {kate, 3}, {michal, 2}]),
+    %% Join will copy and merge existing tables
+    ok = cets_join:join(lock_name(Config), #{}, Pid1, Pid2),
+    ok = cets_join:join(lock_name(Config), #{}, Pid1, Pid3),
+    %% Key with the highest Number remains
+    Dump = [{alice, 33}, {bob, 10}, {kate, 3}, {michal, 40}],
+    Dump = cets:dump(T1),
+    Dump = cets:dump(T2),
+    Dump = cets:dump(T3).
+
+-record(user, {name, age, updated}).
+
+%% Test with records (which require keypos = 2 option)
+join_works_with_existing_data_with_conflicts_and_defined_conflict_handler_and_keypos2(Config) ->
+    Opts = #{handle_conflict => fun resolve_user_conflict/2, keypos => 2},
+    T1 = make_name(Config, 1),
+    T2 = make_name(Config, 2),
+    {ok, Pid1} = start_local(T1, Opts),
+    {ok, Pid2} = start_local(T2, Opts),
+    cets:insert(T1, #user{name = alice, age = 30, updated = erlang:system_time()}),
+    cets:insert(T2, #user{name = alice, age = 25, updated = erlang:system_time()}),
+    %% Join will copy and merge existing tables
+    ok = cets_join:join(keypos2_lock, #{}, Pid1, Pid2),
+    %% Last inserted record is in the table
+    [#user{age = 25}] = ets:lookup(T1, alice),
+    [#user{age = 25}] = ets:lookup(T2, alice).
+
+%% Keep record with highest timestamp
+resolve_user_conflict(U1 = #user{updated = TS1}, _U2 = #user{updated = TS2}) when
+    TS1 > TS2
+->
+    U1;
+resolve_user_conflict(_U1, U2) ->
+    U2.
+
+resolve_highest({K, A}, {K, B}) ->
+    {K, max(A, B)}.
+
+bag_with_conflict_handler_not_allowed(Config) ->
+    {error, [bag_with_conflict_handler]} =
+        cets:start(make_name(Config), #{handle_conflict => fun resolve_highest/2, type => bag}).
+
+join_with_the_same_pid(Config) ->
+    Tab = make_name(Config),
+    {ok, Pid} = start_local(Tab),
+    %% Just insert something into a table to check later the size
+    cets:insert(Tab, {1, 1}),
+    link(Pid),
+    {error, join_with_the_same_pid} = cets_join:join(lock_name(Config), #{}, Pid, Pid),
+    Nodes = [node()],
+    %% The process is still running and no data loss (i.e. size is not zero)
+    #{nodes := Nodes, size := 1} = cets:info(Pid).
+
+join_ref_is_same_after_join(Config) ->
+    #{pid1 := Pid1, pid2 := Pid2} = given_two_joined_tables(Config),
+    #{join_ref := JoinRef} = cets:info(Pid1),
+    #{join_ref := JoinRef} = cets:info(Pid2).
+
+join_fails_because_server_process_not_found(Config) ->
+    {ok, Pid1} = start_local(make_name(Config, 1)),
+    {ok, Pid2} = start_local(make_name(Config, 2)),
+    F = fun
+        (join_start) ->
+            exit(Pid1, sim_error);
+        (_) ->
+            ok
+    end,
+    {error, {task_failed, {noproc, {gen_server, call, [Pid1, get_info, infinity]}}, _}} =
+        cets_join:join(lock_name(Config), #{}, Pid1, Pid2, #{checkpoint_handler => F}).
+
+join_fails_because_server_process_not_found_before_get_pids(Config) ->
+    {ok, Pid1} = start_local(make_name(Config, 1)),
+    {ok, Pid2} = start_local(make_name(Config, 2)),
+    F = fun
+        (before_get_pids) ->
+            exit(Pid1, sim_error);
+        (_) ->
+            ok
+    end,
+    {error, {task_failed, {noproc, {gen_server, call, [Pid1, other_servers, infinity]}}, _}} =
+        cets_join:join(lock_name(Config), #{}, Pid1, Pid2, #{checkpoint_handler => F}).
+
+join_fails_before_send_dump(Config) ->
+    Me = self(),
+    DownFn = fun(#{remote_pid := RemotePid, table := _Tab}) ->
+        Me ! {down_called, self(), RemotePid}
+    end,
+    {ok, Pid1} = start_local(make_name(Config, 1), #{handle_down => DownFn}),
+    {ok, Pid2} = start_local(make_name(Config, 2), #{}),
+    cets:insert(Pid1, {1}),
+    cets:insert(Pid2, {2}),
+    F = fun
+        ({before_send_dump, P}) when Pid1 =:= P ->
+            Me ! before_send_dump_called_for_pid1;
+        ({before_send_dump, P}) when Pid2 =:= P ->
+            error(sim_error);
+        (_) ->
+            ok
+    end,
+    ?assertMatch(
+        {error, {task_failed, sim_error, #{}}},
+        cets_join:join(lock_name(Config), #{}, Pid1, Pid2, #{checkpoint_handler => F})
+    ),
+    %% Ensure we sent dump to Pid1
+    receive_message(before_send_dump_called_for_pid1),
+    %% Not joined, some data exchanged
+    cets:ping_all(Pid1),
+    cets:ping_all(Pid2),
+    [] = cets:other_pids(Pid1),
+    [] = cets:other_pids(Pid2),
+    %% Pid1 applied new version of dump
+    %% Though, it got disconnected after
+    {ok, [{1}, {2}]} = cets:remote_dump(Pid1),
+    %% Pid2 rejected changes
+    {ok, [{2}]} = cets:remote_dump(Pid2),
+    receive_message({down_called, Pid1, Pid2}).
+
+%% Checks that remote ops are dropped if join_ref does not match in the state and in remote_op message
+join_fails_before_send_dump_and_there_are_pending_remote_ops(Config) ->
+    Me = self(),
+    {ok, Pid1} = start_local(make_name(Config, 1)),
+    {ok, Pid2} = start_local(make_name(Config, 2)),
+    F = fun
+        ({before_send_dump, P}) when Pid1 =:= P ->
+            Me ! before_send_dump_called_for_pid1;
+        ({before_send_dump, P}) when Pid2 =:= P ->
+            sys:suspend(Pid2),
+            error(sim_error);
+        (before_unpause) ->
+            %% Crash in before_unpause, otherwise cets_join will block in cets:unpause/2
+            %% (because Pid2 is suspended).
+            %% Servers would be unpaused automatically though, because cets_join process exits
+            %% (i.e. cets:unpause/2 call is totally optional)
+            error(sim_error2);
+        (_) ->
+            ok
+    end,
+    ?assertMatch(
+        {error, {task_failed, sim_error2, #{}}},
+        cets_join:join(lock_name(Config), #{}, Pid1, Pid2, #{checkpoint_handler => F})
+    ),
+    %% Ensure we sent dump to Pid1
+    receive_message(before_send_dump_called_for_pid1),
+    cets:insert_request(Pid1, {1}),
+    %% Check that the remote_op has reached Pid2 message box
+    cets_test_wait:wait_for_remote_ops_in_the_message_box(Pid2, 1),
+    sys:resume(Pid2),
+    %% Wait till remote_op is processed
+    cets:ping(Pid2),
+    %% Check that the insert was ignored
+    {ok, []} = cets:remote_dump(Pid2).

From 45b1f7fe6db6215f805f2c2196bae5ed27ee34f6 Mon Sep 17 00:00:00 2001
From: Mikhail Uvarov <arcusfelis@gmail.com>
Date: Tue, 12 Mar 2024 16:55:17 +0100
Subject: [PATCH 17/30] Move more tests into cets_join_SUITE

---
 test/cets_SUITE.erl       | 210 +---------------------------------
 test/cets_join_SUITE.erl  | 230 +++++++++++++++++++++++++++++++++++++-
 test/cets_test_helper.erl |   6 +-
 3 files changed, 232 insertions(+), 214 deletions(-)

diff --git a/test/cets_SUITE.erl b/test/cets_SUITE.erl
index e4a8683..463b773 100644
--- a/test/cets_SUITE.erl
+++ b/test/cets_SUITE.erl
@@ -61,7 +61,8 @@
 
 -import(cets_test_helper, [
     assert_unique/1,
-    set_other_servers/2
+    set_other_servers/2,
+    set_join_ref/2
 ]).
 
 all() ->
@@ -118,13 +119,6 @@ cases() ->
         insert_serial_works_when_leader_is_back,
         insert_serial_blocks_when_leader_is_not_back,
         leader_is_the_same_in_metadata_after_join,
-        send_dump_fails_during_join_because_receiver_exits,
-        join_fails_in_check_fully_connected,
-        join_fails_because_join_refs_do_not_match_for_nodes_in_segment,
-        join_fails_because_pids_do_not_match_for_nodes_in_segment,
-        join_fails_because_servers_overlap,
-        remote_ops_are_ignored_if_join_ref_does_not_match,
-        join_retried_if_lock_is_busy,
         send_dump_contains_already_added_servers,
         servers_remove_each_other_if_join_refs_do_not_match_after_unpause,
         test_multinode,
@@ -185,10 +179,8 @@ only_for_logger_cases() ->
     [
         run_tracked_logged_check_logger,
         long_call_fails_because_linked_process_dies,
-        logs_are_printed_when_join_fails_because_servers_overlap,
         pause_owner_crashed_is_logged,
         pause_owner_crashed_is_not_logged_if_reason_is_normal,
-        join_done_already_while_waiting_for_lock_so_do_nothing,
         atom_error_is_logged_in_tracked,
         shutdown_reason_is_not_logged_in_tracked,
         other_reason_is_logged_in_tracked,
@@ -646,188 +638,6 @@ leader_is_the_same_in_metadata_after_join(Config) ->
     Leader = cets_metadata:get(T1, leader),
     Leader = cets_metadata:get(T2, leader).
 
-send_dump_fails_during_join_because_receiver_exits(Config) ->
-    Me = self(),
-    DownFn = fun(#{remote_pid := RemotePid, table := _Tab}) ->
-        Me ! {down_called, self(), RemotePid}
-    end,
-    {ok, Pid1} = start_local(make_name(Config, 1), #{handle_down => DownFn}),
-    {ok, Pid2} = start_local(make_name(Config, 2), #{}),
-    F = fun
-        ({before_send_dump, P}) when P =:= Pid1 ->
-            %% Kill Pid2 process.
-            %% It does not crash the join process.
-            %% Pid1 would receive a dump with Pid2 in the server list.
-            exit(Pid2, sim_error),
-            %% Ensure Pid1 got DOWN message from Pid2 already
-            pong = cets:ping(Pid1),
-            Me ! before_send_dump_called;
-        (_) ->
-            ok
-    end,
-    ok = cets_join:join(lock_name(Config), #{}, Pid1, Pid2, #{checkpoint_handler => F}),
-    receive_message(before_send_dump_called),
-    pong = cets:ping(Pid1),
-    receive_message({down_called, Pid1, Pid2}),
-    [] = cets:other_pids(Pid1),
-    %% Pid1 still works
-    cets:insert(Pid1, {1}),
-    {ok, [{1}]} = cets:remote_dump(Pid1).
-
-join_fails_in_check_fully_connected(Config) ->
-    Me = self(),
-    #{pids := [Pid1, Pid2, Pid3]} = given_3_servers(Config),
-    %% Pid2 and Pid3 are connected
-    ok = cets_join:join(lock_name(Config), #{}, Pid2, Pid3, #{}),
-    [Pid3] = cets:other_pids(Pid2),
-    F = fun
-        (before_check_fully_connected) ->
-            %% Ask Pid2 to remove Pid3 from the list
-            Pid2 ! {'DOWN', make_ref(), process, Pid3, sim_error},
-            %% Ensure Pid2 did the cleaning
-            pong = cets:ping(Pid2),
-            [] = cets:other_pids(Pid2),
-            Me ! before_check_fully_connected_called;
-        (_) ->
-            ok
-    end,
-    ?assertMatch(
-        {error, {task_failed, check_fully_connected_failed, #{}}},
-        cets_join:join(lock_name(Config), #{}, Pid1, Pid2, #{checkpoint_handler => F})
-    ),
-    receive_message(before_check_fully_connected_called).
-
-join_fails_because_join_refs_do_not_match_for_nodes_in_segment(Config) ->
-    #{pids := [Pid1, Pid2, Pid3]} = given_3_servers(Config),
-    %% Pid2 and Pid3 are connected
-    %% But for some reason Pid3 has a different join_ref
-    %% (probably could happen if it still haven't checked other nodes after a join)
-    ok = cets_join:join(lock_name(Config), #{}, Pid2, Pid3, #{}),
-    set_join_ref(Pid3, make_ref()),
-    ?assertMatch(
-        {error, {task_failed, check_same_join_ref_failed, #{}}},
-        cets_join:join(lock_name(Config), #{}, Pid1, Pid2, #{})
-    ).
-
-join_fails_because_pids_do_not_match_for_nodes_in_segment(Config) ->
-    #{pids := [Pid1, Pid2, Pid3]} = given_3_servers(Config),
-    %% Pid2 and Pid3 are connected
-    %% But for some reason Pid3 has a different other_nodes list
-    %% (probably could happen if it still haven't checked other nodes after a join)
-    ok = cets_join:join(lock_name(Config), #{}, Pid2, Pid3, #{}),
-    set_other_servers(Pid3, []),
-    ?assertMatch(
-        {error, {task_failed, check_fully_connected_failed, #{}}},
-        cets_join:join(lock_name(Config), #{}, Pid1, Pid2, #{})
-    ).
-
-join_fails_because_servers_overlap(Config) ->
-    #{pids := [Pid1, Pid2, Pid3]} = given_3_servers(Config),
-    set_other_servers(Pid1, [Pid3]),
-    set_other_servers(Pid2, [Pid3]),
-    ?assertMatch(
-        {error, {task_failed, check_do_not_overlap_failed, #{}}},
-        cets_join:join(lock_name(Config), #{}, Pid1, Pid2, #{})
-    ).
-
-%% join_fails_because_servers_overlap testcase, but we check the logging.
-%% We check that `?LOG_ERROR(#{what => check_do_not_overlap_failed})' is called.
-logs_are_printed_when_join_fails_because_servers_overlap(Config) ->
-    LogRef = make_ref(),
-    logger_debug_h:start(#{id => ?FUNCTION_NAME}),
-    #{pids := [Pid1, Pid2, Pid3]} = given_3_servers(Config),
-    set_other_servers(Pid1, [Pid3]),
-    set_other_servers(Pid2, [Pid3]),
-    ?assertMatch(
-        {error, {task_failed, check_do_not_overlap_failed, #{}}},
-        cets_join:join(lock_name(Config), #{log_ref => LogRef}, Pid1, Pid2, #{})
-    ),
-    receive
-        {log, ?FUNCTION_NAME, #{
-            level := error,
-            msg :=
-                {report, #{
-                    what := check_do_not_overlap_failed, log_ref := LogRef
-                }}
-        }} ->
-            ok
-    after 5000 ->
-        ct:fail(timeout)
-    end.
-
-remote_ops_are_ignored_if_join_ref_does_not_match(Config) ->
-    {ok, Pid1} = start_local(make_name(Config, 1)),
-    {ok, Pid2} = start_local(make_name(Config, 2)),
-    ok = cets_join:join(lock_name(Config), #{}, Pid1, Pid2, #{}),
-    #{join_ref := JoinRef} = cets:info(Pid1),
-    set_join_ref(Pid1, make_ref()),
-    cets:insert(Pid2, {1}),
-    %% fix and check again
-    set_join_ref(Pid1, JoinRef),
-    cets:insert(Pid2, {2}),
-    {ok, [{2}]} = cets:remote_dump(Pid1).
-
-join_retried_if_lock_is_busy(Config) ->
-    Me = self(),
-    {ok, Pid1} = start_local(make_name(Config, 1)),
-    {ok, Pid2} = start_local(make_name(Config, 2)),
-    Lock = lock_name(Config),
-    SleepyF = fun
-        (join_start) ->
-            Me ! join_start,
-            timer:sleep(infinity);
-        (_) ->
-            ok
-    end,
-    F = fun
-        (before_retry) -> Me ! before_retry;
-        (_) -> ok
-    end,
-    %% Get the lock in a separate process
-    proc_lib:spawn_link(fun() ->
-        cets_join:join(Lock, #{}, Pid1, Pid2, #{checkpoint_handler => SleepyF})
-    end),
-    receive_message(join_start),
-    %% We actually would not return from cets_join:join unless we get the lock
-    proc_lib:spawn_link(fun() ->
-        ok = cets_join:join(Lock, #{}, Pid1, Pid2, #{checkpoint_handler => F})
-    end),
-    receive_message(before_retry).
-
-join_done_already_while_waiting_for_lock_so_do_nothing(Config) ->
-    logger_debug_h:start(#{id => ?FUNCTION_NAME}),
-    Me = self(),
-    #{pids := [Pid1, Pid2, Pid3, Pid4]} = given_n_servers(Config, 4, #{}),
-    Lock = lock_name(Config),
-    ok = cets_join:join(Lock, #{}, Pid1, Pid2, #{}),
-    ok = cets_join:join(Lock, #{}, Pid3, Pid4, #{}),
-    %% It is to just match logs
-    LogRef = make_ref(),
-    Info = #{log_ref => LogRef},
-    F1 = send_join_start_back_and_wait_for_continue_joining(),
-    F2 = fun(_) -> ok end,
-    %% Get the lock in a separate process
-    proc_lib:spawn_link(fun() ->
-        ok = cets_join:join(Lock, Info, Pid1, Pid3, #{checkpoint_handler => F1}),
-        Me ! first_join_returns
-    end),
-    JoinPid = receive_message_with_arg(join_start),
-    proc_lib:spawn_link(fun() ->
-        ok = cets_join:join(Lock, Info, Pid1, Pid3, #{checkpoint_handler => F2}),
-        Me ! second_join_returns
-    end),
-    JoinPid ! continue_joining,
-    %% At this point our first join would finish, after that our second join should exit too.
-    receive_message(first_join_returns),
-    receive_message(second_join_returns),
-    %% Ensure all logs are received by removing the handler, it is a sync operation.
-    %% (we do not expect any logs anyway).
-    logger:remove_handler(?FUNCTION_NAME),
-    %% Ensure there is nothing logged, we use log_ref to ignore logs from other tests.
-    %% The counter example for no logging is
-    %% the logs_are_printed_when_join_fails_because_servers_overlap testcase.
-    cets_test_log:assert_nothing_is_logged(?FUNCTION_NAME, LogRef).
-
 pause_owner_crashed_is_logged(Config) ->
     ct:timetrap({seconds, 6}),
     logger_debug_h:start(#{id => ?FUNCTION_NAME}),
@@ -1855,9 +1665,6 @@ start_link_local(Name, Opts) ->
     schedule_cleanup(Pid),
     {ok, Pid}.
 
-set_join_ref(Pid, JoinRef) ->
-    sys:replace_state(Pid, fun(#{join_ref := _} = State) -> State#{join_ref := JoinRef} end).
-
 stopped_pid() ->
     %% Get a pid for a stopped process
     {Pid, Mon} = spawn_monitor(fun() -> ok end),
@@ -1874,19 +1681,6 @@ bad_node_pid_binary() ->
     <<131, 88, 100, 0, 17, 98, 97, 100, 110, 111, 100, 101, 64, 108, 111, 99, 97, 108, 104, 111,
         115, 116, 0, 0, 0, 90, 0, 0, 0, 0, 100, 206, 70, 92>>.
 
-send_join_start_back_and_wait_for_continue_joining() ->
-    Me = self(),
-    fun
-        (join_start) ->
-            Me ! {join_start, self()},
-            receive
-                continue_joining ->
-                    ok
-            end;
-        (_) ->
-            ok
-    end.
-
 not_leader(Leader, Other, Leader) ->
     Other;
 not_leader(Other, Leader, Leader) ->
diff --git a/test/cets_join_SUITE.erl b/test/cets_join_SUITE.erl
index 522b125..3408462 100644
--- a/test/cets_join_SUITE.erl
+++ b/test/cets_join_SUITE.erl
@@ -30,7 +30,9 @@
     make_signalling_process/0,
     given_two_joined_tables/1,
     given_two_joined_tables/2,
-    given_3_servers/2
+    given_3_servers/1,
+    given_3_servers/2,
+    given_n_servers/3
 ]).
 
 -import(cets_test_wait, [
@@ -40,6 +42,7 @@
 
 -import(cets_test_receive, [
     receive_message/1,
+    receive_message_with_arg/1,
     flush_message/1
 ]).
 
@@ -54,7 +57,11 @@
     rpc/4
 ]).
 
--import(cets_test_helper, [assert_unique/1]).
+-import(cets_test_helper, [
+    set_join_ref/2,
+    set_other_servers/2,
+    assert_unique/1
+]).
 
 -import(cets_test_rpc, [
     other_nodes/2
@@ -62,15 +69,24 @@
 
 all() ->
     [
-        {group, cets}
+        {group, cets},
+        {group, cets_no_log}
         %       {group, cets_seq},
         %       {group, cets_seq_no_log}
     ].
 
+only_for_logger_cases() ->
+    [
+        join_done_already_while_waiting_for_lock_so_do_nothing,
+        logs_are_printed_when_join_fails_because_servers_overlap
+    ].
+
 groups() ->
     %% Cases should have unique names, because we name CETS servers based on case names
     [
-        {cets, [parallel, {repeat_until_any_fail, 3}], assert_unique(cases())},
+        {cets, [parallel, {repeat_until_any_fail, 3}],
+            assert_unique(cases() ++ only_for_logger_cases())},
+        {cets_no_log, [parallel], assert_unique(cases())},
         %% These tests actually simulate a netsplit on the distribution level.
         %% Though, global's prevent_overlapping_partitions option starts kicking
         %% all nodes from the cluster, so we have to be careful not to break other cases.
@@ -92,7 +108,14 @@ cases() ->
         join_fails_because_server_process_not_found,
         join_fails_because_server_process_not_found_before_get_pids,
         join_fails_before_send_dump,
-        join_fails_before_send_dump_and_there_are_pending_remote_ops
+        join_fails_before_send_dump_and_there_are_pending_remote_ops,
+        send_dump_fails_during_join_because_receiver_exits,
+        join_fails_in_check_fully_connected,
+        join_fails_because_join_refs_do_not_match_for_nodes_in_segment,
+        join_fails_because_pids_do_not_match_for_nodes_in_segment,
+        join_fails_because_servers_overlap,
+        remote_ops_are_ignored_if_join_ref_does_not_match,
+        join_retried_if_lock_is_busy
     ].
 
 seq_cases() ->
@@ -335,3 +358,200 @@ join_fails_before_send_dump_and_there_are_pending_remote_ops(Config) ->
     cets:ping(Pid2),
     %% Check that the insert was ignored
     {ok, []} = cets:remote_dump(Pid2).
+
+send_dump_fails_during_join_because_receiver_exits(Config) ->
+    Me = self(),
+    DownFn = fun(#{remote_pid := RemotePid, table := _Tab}) ->
+        Me ! {down_called, self(), RemotePid}
+    end,
+    {ok, Pid1} = start_local(make_name(Config, 1), #{handle_down => DownFn}),
+    {ok, Pid2} = start_local(make_name(Config, 2), #{}),
+    F = fun
+        ({before_send_dump, P}) when P =:= Pid1 ->
+            %% Kill Pid2 process.
+            %% It does not crash the join process.
+            %% Pid1 would receive a dump with Pid2 in the server list.
+            exit(Pid2, sim_error),
+            %% Ensure Pid1 got DOWN message from Pid2 already
+            pong = cets:ping(Pid1),
+            Me ! before_send_dump_called;
+        (_) ->
+            ok
+    end,
+    ok = cets_join:join(lock_name(Config), #{}, Pid1, Pid2, #{checkpoint_handler => F}),
+    receive_message(before_send_dump_called),
+    pong = cets:ping(Pid1),
+    receive_message({down_called, Pid1, Pid2}),
+    [] = cets:other_pids(Pid1),
+    %% Pid1 still works
+    cets:insert(Pid1, {1}),
+    {ok, [{1}]} = cets:remote_dump(Pid1).
+
+join_fails_in_check_fully_connected(Config) ->
+    Me = self(),
+    #{pids := [Pid1, Pid2, Pid3]} = given_3_servers(Config),
+    %% Pid2 and Pid3 are connected
+    ok = cets_join:join(lock_name(Config), #{}, Pid2, Pid3, #{}),
+    [Pid3] = cets:other_pids(Pid2),
+    F = fun
+        (before_check_fully_connected) ->
+            %% Ask Pid2 to remove Pid3 from the list
+            Pid2 ! {'DOWN', make_ref(), process, Pid3, sim_error},
+            %% Ensure Pid2 did the cleaning
+            pong = cets:ping(Pid2),
+            [] = cets:other_pids(Pid2),
+            Me ! before_check_fully_connected_called;
+        (_) ->
+            ok
+    end,
+    ?assertMatch(
+        {error, {task_failed, check_fully_connected_failed, #{}}},
+        cets_join:join(lock_name(Config), #{}, Pid1, Pid2, #{checkpoint_handler => F})
+    ),
+    receive_message(before_check_fully_connected_called).
+
+join_fails_because_join_refs_do_not_match_for_nodes_in_segment(Config) ->
+    #{pids := [Pid1, Pid2, Pid3]} = given_3_servers(Config),
+    %% Pid2 and Pid3 are connected
+    %% But for some reason Pid3 has a different join_ref
+    %% (probably could happen if it still haven't checked other nodes after a join)
+    ok = cets_join:join(lock_name(Config), #{}, Pid2, Pid3, #{}),
+    set_join_ref(Pid3, make_ref()),
+    ?assertMatch(
+        {error, {task_failed, check_same_join_ref_failed, #{}}},
+        cets_join:join(lock_name(Config), #{}, Pid1, Pid2, #{})
+    ).
+
+join_fails_because_pids_do_not_match_for_nodes_in_segment(Config) ->
+    #{pids := [Pid1, Pid2, Pid3]} = given_3_servers(Config),
+    %% Pid2 and Pid3 are connected
+    %% But for some reason Pid3 has a different other_nodes list
+    %% (probably could happen if it still haven't checked other nodes after a join)
+    ok = cets_join:join(lock_name(Config), #{}, Pid2, Pid3, #{}),
+    set_other_servers(Pid3, []),
+    ?assertMatch(
+        {error, {task_failed, check_fully_connected_failed, #{}}},
+        cets_join:join(lock_name(Config), #{}, Pid1, Pid2, #{})
+    ).
+
+join_fails_because_servers_overlap(Config) ->
+    #{pids := [Pid1, Pid2, Pid3]} = given_3_servers(Config),
+    set_other_servers(Pid1, [Pid3]),
+    set_other_servers(Pid2, [Pid3]),
+    ?assertMatch(
+        {error, {task_failed, check_do_not_overlap_failed, #{}}},
+        cets_join:join(lock_name(Config), #{}, Pid1, Pid2, #{})
+    ).
+
+%% join_fails_because_servers_overlap testcase, but we check the logging.
+%% We check that `?LOG_ERROR(#{what => check_do_not_overlap_failed})' is called.
+logs_are_printed_when_join_fails_because_servers_overlap(Config) ->
+    LogRef = make_ref(),
+    logger_debug_h:start(#{id => ?FUNCTION_NAME}),
+    #{pids := [Pid1, Pid2, Pid3]} = given_3_servers(Config),
+    set_other_servers(Pid1, [Pid3]),
+    set_other_servers(Pid2, [Pid3]),
+    ?assertMatch(
+        {error, {task_failed, check_do_not_overlap_failed, #{}}},
+        cets_join:join(lock_name(Config), #{log_ref => LogRef}, Pid1, Pid2, #{})
+    ),
+    receive
+        {log, ?FUNCTION_NAME, #{
+            level := error,
+            msg :=
+                {report, #{
+                    what := check_do_not_overlap_failed, log_ref := LogRef
+                }}
+        }} ->
+            ok
+    after 5000 ->
+        ct:fail(timeout)
+    end.
+
+remote_ops_are_ignored_if_join_ref_does_not_match(Config) ->
+    {ok, Pid1} = start_local(make_name(Config, 1)),
+    {ok, Pid2} = start_local(make_name(Config, 2)),
+    ok = cets_join:join(lock_name(Config), #{}, Pid1, Pid2, #{}),
+    #{join_ref := JoinRef} = cets:info(Pid1),
+    set_join_ref(Pid1, make_ref()),
+    cets:insert(Pid2, {1}),
+    %% fix and check again
+    set_join_ref(Pid1, JoinRef),
+    cets:insert(Pid2, {2}),
+    {ok, [{2}]} = cets:remote_dump(Pid1).
+
+join_retried_if_lock_is_busy(Config) ->
+    Me = self(),
+    {ok, Pid1} = start_local(make_name(Config, 1)),
+    {ok, Pid2} = start_local(make_name(Config, 2)),
+    Lock = lock_name(Config),
+    SleepyF = fun
+        (join_start) ->
+            Me ! join_start,
+            timer:sleep(infinity);
+        (_) ->
+            ok
+    end,
+    F = fun
+        (before_retry) -> Me ! before_retry;
+        (_) -> ok
+    end,
+    %% Get the lock in a separate process
+    proc_lib:spawn_link(fun() ->
+        cets_join:join(Lock, #{}, Pid1, Pid2, #{checkpoint_handler => SleepyF})
+    end),
+    receive_message(join_start),
+    %% We actually would not return from cets_join:join unless we get the lock
+    proc_lib:spawn_link(fun() ->
+        ok = cets_join:join(Lock, #{}, Pid1, Pid2, #{checkpoint_handler => F})
+    end),
+    receive_message(before_retry).
+
+join_done_already_while_waiting_for_lock_so_do_nothing(Config) ->
+    logger_debug_h:start(#{id => ?FUNCTION_NAME}),
+    Me = self(),
+    #{pids := [Pid1, Pid2, Pid3, Pid4]} = given_n_servers(Config, 4, #{}),
+    Lock = lock_name(Config),
+    ok = cets_join:join(Lock, #{}, Pid1, Pid2, #{}),
+    ok = cets_join:join(Lock, #{}, Pid3, Pid4, #{}),
+    %% It is to just match logs
+    LogRef = make_ref(),
+    Info = #{log_ref => LogRef},
+    F1 = send_join_start_back_and_wait_for_continue_joining(),
+    F2 = fun(_) -> ok end,
+    %% Get the lock in a separate process
+    proc_lib:spawn_link(fun() ->
+        ok = cets_join:join(Lock, Info, Pid1, Pid3, #{checkpoint_handler => F1}),
+        Me ! first_join_returns
+    end),
+    JoinPid = receive_message_with_arg(join_start),
+    proc_lib:spawn_link(fun() ->
+        ok = cets_join:join(Lock, Info, Pid1, Pid3, #{checkpoint_handler => F2}),
+        Me ! second_join_returns
+    end),
+    JoinPid ! continue_joining,
+    %% At this point our first join would finish, after that our second join should exit too.
+    receive_message(first_join_returns),
+    receive_message(second_join_returns),
+    %% Ensure all logs are received by removing the handler, it is a sync operation.
+    %% (we do not expect any logs anyway).
+    logger:remove_handler(?FUNCTION_NAME),
+    %% Ensure there is nothing logged, we use log_ref to ignore logs from other tests.
+    %% The counter example for no logging is
+    %% the logs_are_printed_when_join_fails_because_servers_overlap testcase.
+    cets_test_log:assert_nothing_is_logged(?FUNCTION_NAME, LogRef).
+
+%% Heleprs
+
+send_join_start_back_and_wait_for_continue_joining() ->
+    Me = self(),
+    fun
+        (join_start) ->
+            Me ! {join_start, self()},
+            receive
+                continue_joining ->
+                    ok
+            end;
+        (_) ->
+            ok
+    end.
diff --git a/test/cets_test_helper.erl b/test/cets_test_helper.erl
index fedc8d4..a12acd6 100644
--- a/test/cets_test_helper.erl
+++ b/test/cets_test_helper.erl
@@ -8,7 +8,8 @@
 
 -export([
     set_nodedown_timestamp/3,
-    set_other_servers/2
+    set_other_servers/2,
+    set_join_ref/2
 ]).
 
 get_disco_timestamp(Disco, MapName, NodeKey) ->
@@ -31,3 +32,6 @@ set_other_servers(Pid, Servers) ->
     sys:replace_state(Pid, fun(#{other_servers := _} = State) ->
         State#{other_servers := Servers}
     end).
+
+set_join_ref(Pid, JoinRef) ->
+    sys:replace_state(Pid, fun(#{join_ref := _} = State) -> State#{join_ref := JoinRef} end).

From fa821f0b3720cb0b48d0f150d47c398309a6f1e1 Mon Sep 17 00:00:00 2001
From: Mikhail Uvarov <arcusfelis@gmail.com>
Date: Tue, 12 Mar 2024 17:22:28 +0100
Subject: [PATCH 18/30] Move move cases into cets_join_SUITE

---
 test/cets_SUITE.erl      | 62 ----------------------------------
 test/cets_join_SUITE.erl | 73 +++++++++++++++++++++++++++++++++++++---
 2 files changed, 68 insertions(+), 67 deletions(-)

diff --git a/test/cets_SUITE.erl b/test/cets_SUITE.erl
index 463b773..5ab3237 100644
--- a/test/cets_SUITE.erl
+++ b/test/cets_SUITE.erl
@@ -193,8 +193,6 @@ seq_cases() ->
     [
         insert_returns_when_netsplit,
         inserts_after_netsplit_reconnects,
-        joining_not_fully_connected_node_is_not_allowed,
-        joining_not_fully_connected_node_is_not_allowed2,
         cets_ping_all_returns_when_ping_crashes,
         join_interrupted_when_ping_crashes,
         ping_pairs_returns_pongs,
@@ -1495,66 +1493,6 @@ inserts_after_netsplit_reconnects(Config) ->
     [{1, v2}] = dump(Node1, Tab),
     [{1, v3}] = dump(Peer5, Tab).
 
-%% Joins from a bad (not fully connected) node
-%% Join process should check if nodes could contact each other before allowing to join
-joining_not_fully_connected_node_is_not_allowed(Config) ->
-    #{ct3 := Peer3, ct5 := Peer5} = proplists:get_value(peers, Config),
-    #{ct5 := Node5} = proplists:get_value(nodes, Config),
-    Node1 = node(),
-    Tab = make_name(Config),
-    {ok, Pid1} = start(Node1, Tab),
-    {ok, Pid3} = start(Peer3, Tab),
-    {ok, Pid5} = start(Peer5, Tab),
-    ok = cets_join:join(lock_name(Config), #{}, Pid1, Pid3),
-    %% No connection between Peer5 and Node1
-    block_node(Node5, Peer5),
-    try
-        %% Pid5 and Pid3 could contact each other.
-        %% Pid3 could contact Pid1 (they are joined).
-        %% But Pid5 cannot contact Pid1.
-        {error, {task_failed, check_could_reach_each_other_failed, _}} =
-            rpc(Peer5, cets_join, join, [lock_name(Config), #{}, Pid5, Pid3]),
-        %% Still connected
-        cets:insert(Pid1, {r1}),
-        {ok, [{r1}]} = cets:remote_dump(Pid3),
-        [Pid3] = cets:other_pids(Pid1),
-        [Pid1] = cets:other_pids(Pid3)
-    after
-        reconnect_node(Node5, Peer5)
-    end,
-    [] = cets:other_pids(Pid5).
-
-%% Joins from a good (fully connected) node
-joining_not_fully_connected_node_is_not_allowed2(Config) ->
-    #{ct3 := Peer3, ct5 := Peer5} = proplists:get_value(peers, Config),
-    #{ct5 := Node5} = proplists:get_value(nodes, Config),
-    Node1 = node(),
-    Tab = make_name(Config),
-    {ok, Pid1} = start(Node1, Tab),
-    {ok, Pid3} = start(Peer3, Tab),
-    {ok, Pid5} = start(Peer5, Tab),
-    ok = cets_join:join(lock_name(Config), #{}, Pid1, Pid3),
-    %% No connection between Peer5 and Node1
-    block_node(Node5, Peer5),
-    try
-        %% Pid5 and Pid3 could contact each other.
-        %% Pid3 could contact Pid1 (they are joined).
-        %% But Pid5 cannot contact Pid1.
-        {error, {task_failed, check_could_reach_each_other_failed, _}} = rpc(
-            Peer3, cets_join, join, [
-                lock_name(Config), #{}, Pid5, Pid3
-            ]
-        ),
-        %% Still connected
-        cets:insert(Pid1, {r1}),
-        {ok, [{r1}]} = cets:remote_dump(Pid3),
-        [Pid3] = cets:other_pids(Pid1),
-        [Pid1] = cets:other_pids(Pid3)
-    after
-        reconnect_node(Node5, Peer5)
-    end,
-    [] = cets:other_pids(Pid5).
-
 cets_ping_all_returns_when_ping_crashes(Config) ->
     #{pid1 := Pid1, pid2 := Pid2} = given_two_joined_tables(Config),
     meck:new(cets, [passthrough]),
diff --git a/test/cets_join_SUITE.erl b/test/cets_join_SUITE.erl
index 3408462..a81d24e 100644
--- a/test/cets_join_SUITE.erl
+++ b/test/cets_join_SUITE.erl
@@ -70,8 +70,8 @@
 all() ->
     [
         {group, cets},
-        {group, cets_no_log}
-        %       {group, cets_seq},
+        {group, cets_no_log},
+        {group, cets_seq}
         %       {group, cets_seq_no_log}
     ].
 
@@ -119,14 +119,17 @@ cases() ->
     ].
 
 seq_cases() ->
-    [].
+    [
+        joining_not_fully_connected_node_is_not_allowed,
+        joining_not_fully_connected_node_is_not_allowed2
+    ].
 
 cets_seq_no_log_cases() ->
     [].
 
 init_per_suite(Config) ->
     cets_test_setup:init_cleanup_table(),
-    cets_test_peer:start([ct2, ct5], Config).
+    cets_test_peer:start([ct2, ct3, ct5], Config).
 
 end_per_suite(Config) ->
     cets_test_setup:remove_cleanup_table(),
@@ -541,7 +544,67 @@ join_done_already_while_waiting_for_lock_so_do_nothing(Config) ->
     %% the logs_are_printed_when_join_fails_because_servers_overlap testcase.
     cets_test_log:assert_nothing_is_logged(?FUNCTION_NAME, LogRef).
 
-%% Heleprs
+%% Joins from a bad (not fully connected) node
+%% Join process should check if nodes could contact each other before allowing to join
+joining_not_fully_connected_node_is_not_allowed(Config) ->
+    #{ct3 := Peer3, ct5 := Peer5} = proplists:get_value(peers, Config),
+    #{ct5 := Node5} = proplists:get_value(nodes, Config),
+    Node1 = node(),
+    Tab = make_name(Config),
+    {ok, Pid1} = start(Node1, Tab),
+    {ok, Pid3} = start(Peer3, Tab),
+    {ok, Pid5} = start(Peer5, Tab),
+    ok = cets_join:join(lock_name(Config), #{}, Pid1, Pid3),
+    %% No connection between Peer5 and Node1
+    block_node(Node5, Peer5),
+    try
+        %% Pid5 and Pid3 could contact each other.
+        %% Pid3 could contact Pid1 (they are joined).
+        %% But Pid5 cannot contact Pid1.
+        {error, {task_failed, check_could_reach_each_other_failed, _}} =
+            rpc(Peer5, cets_join, join, [lock_name(Config), #{}, Pid5, Pid3]),
+        %% Still connected
+        cets:insert(Pid1, {r1}),
+        {ok, [{r1}]} = cets:remote_dump(Pid3),
+        [Pid3] = cets:other_pids(Pid1),
+        [Pid1] = cets:other_pids(Pid3)
+    after
+        reconnect_node(Node5, Peer5)
+    end,
+    [] = cets:other_pids(Pid5).
+
+%% Joins from a good (fully connected) node
+joining_not_fully_connected_node_is_not_allowed2(Config) ->
+    #{ct3 := Peer3, ct5 := Peer5} = proplists:get_value(peers, Config),
+    #{ct5 := Node5} = proplists:get_value(nodes, Config),
+    Node1 = node(),
+    Tab = make_name(Config),
+    {ok, Pid1} = start(Node1, Tab),
+    {ok, Pid3} = start(Peer3, Tab),
+    {ok, Pid5} = start(Peer5, Tab),
+    ok = cets_join:join(lock_name(Config), #{}, Pid1, Pid3),
+    %% No connection between Peer5 and Node1
+    block_node(Node5, Peer5),
+    try
+        %% Pid5 and Pid3 could contact each other.
+        %% Pid3 could contact Pid1 (they are joined).
+        %% But Pid5 cannot contact Pid1.
+        {error, {task_failed, check_could_reach_each_other_failed, _}} = rpc(
+            Peer3, cets_join, join, [
+                lock_name(Config), #{}, Pid5, Pid3
+            ]
+        ),
+        %% Still connected
+        cets:insert(Pid1, {r1}),
+        {ok, [{r1}]} = cets:remote_dump(Pid3),
+        [Pid3] = cets:other_pids(Pid1),
+        [Pid1] = cets:other_pids(Pid3)
+    after
+        reconnect_node(Node5, Peer5)
+    end,
+    [] = cets:other_pids(Pid5).
+
+%% Helpers
 
 send_join_start_back_and_wait_for_continue_joining() ->
     Me = self(),

From 53cda772337b577710eeb5dd4eeaa0ee4acca7ef Mon Sep 17 00:00:00 2001
From: Mikhail Uvarov <arcusfelis@gmail.com>
Date: Tue, 12 Mar 2024 17:28:38 +0100
Subject: [PATCH 19/30] Move join_interrupted_when_ping_crashes

---
 test/cets_SUITE.erl      | 15 ---------------
 test/cets_join_SUITE.erl | 24 ++++++++++++++++++++----
 2 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/test/cets_SUITE.erl b/test/cets_SUITE.erl
index 5ab3237..32c2ad6 100644
--- a/test/cets_SUITE.erl
+++ b/test/cets_SUITE.erl
@@ -194,7 +194,6 @@ seq_cases() ->
         insert_returns_when_netsplit,
         inserts_after_netsplit_reconnects,
         cets_ping_all_returns_when_ping_crashes,
-        join_interrupted_when_ping_crashes,
         ping_pairs_returns_pongs,
         ping_pairs_returns_earlier,
         pre_connect_fails_on_our_node,
@@ -206,7 +205,6 @@ seq_cases() ->
 
 cets_seq_no_log_cases() ->
     [
-        join_interrupted_when_ping_crashes,
         node_down_history_is_updated_when_netsplit_happens,
         send_check_servers_is_called_before_last_server_got_dump,
         remote_ops_are_not_sent_before_last_server_got_dump
@@ -1503,19 +1501,6 @@ cets_ping_all_returns_when_ping_crashes(Config) ->
     ?assertMatch({error, [{Pid2, {'EXIT', {simulate_crash, _}}}]}, cets:ping_all(Pid1)),
     meck:unload().
 
-join_interrupted_when_ping_crashes(Config) ->
-    #{pid1 := Pid1, pid2 := Pid2} = given_two_joined_tables(Config),
-    Tab3 = make_name(Config, 3),
-    {ok, Pid3} = start_local(Tab3, #{}),
-    meck:new(cets, [passthrough]),
-    meck:expect(cets_call, long_call, fun
-        (Server, ping) when Server == Pid2 -> error(simulate_crash);
-        (Server, Msg) -> meck:passthrough([Server, Msg])
-    end),
-    Res = cets_join:join(lock_name(Config), #{}, Pid1, Pid3),
-    ?assertMatch({error, {task_failed, ping_all_failed, #{}}}, Res),
-    meck:unload().
-
 node_down_history_is_updated_when_netsplit_happens(Config) ->
     %% node_down_history is available in cets:info/1 API.
     %% It could be used for manual debugging in situations
diff --git a/test/cets_join_SUITE.erl b/test/cets_join_SUITE.erl
index a81d24e..2f02212 100644
--- a/test/cets_join_SUITE.erl
+++ b/test/cets_join_SUITE.erl
@@ -71,8 +71,8 @@ all() ->
     [
         {group, cets},
         {group, cets_no_log},
-        {group, cets_seq}
-        %       {group, cets_seq_no_log}
+        {group, cets_seq},
+        {group, cets_seq_no_log}
     ].
 
 only_for_logger_cases() ->
@@ -121,11 +121,14 @@ cases() ->
 seq_cases() ->
     [
         joining_not_fully_connected_node_is_not_allowed,
-        joining_not_fully_connected_node_is_not_allowed2
+        joining_not_fully_connected_node_is_not_allowed2,
+        join_interrupted_when_ping_crashes
     ].
 
 cets_seq_no_log_cases() ->
-    [].
+    [
+        join_interrupted_when_ping_crashes
+    ].
 
 init_per_suite(Config) ->
     cets_test_setup:init_cleanup_table(),
@@ -604,6 +607,19 @@ joining_not_fully_connected_node_is_not_allowed2(Config) ->
     end,
     [] = cets:other_pids(Pid5).
 
+join_interrupted_when_ping_crashes(Config) ->
+    #{pid1 := Pid1, pid2 := Pid2} = given_two_joined_tables(Config),
+    Tab3 = make_name(Config, 3),
+    {ok, Pid3} = start_local(Tab3, #{}),
+    meck:new(cets, [passthrough]),
+    meck:expect(cets_call, long_call, fun
+        (Server, ping) when Server == Pid2 -> error(simulate_crash);
+        (Server, Msg) -> meck:passthrough([Server, Msg])
+    end),
+    Res = cets_join:join(lock_name(Config), #{}, Pid1, Pid3),
+    ?assertMatch({error, {task_failed, ping_all_failed, #{}}}, Res),
+    meck:unload().
+
 %% Helpers
 
 send_join_start_back_and_wait_for_continue_joining() ->

From 19fba2fb667f3403683538ab12a18c5365bc074b Mon Sep 17 00:00:00 2001
From: Mikhail Uvarov <arcusfelis@gmail.com>
Date: Tue, 12 Mar 2024 17:55:55 +0100
Subject: [PATCH 20/30] Move tests into cets_netsplit_SUITE

---
 test/cets_SUITE.erl          | 131 ------------------
 test/cets_netsplit_SUITE.erl | 259 +++++++++++++++++++++++++++++++++++
 2 files changed, 259 insertions(+), 131 deletions(-)
 create mode 100644 test/cets_netsplit_SUITE.erl

diff --git a/test/cets_SUITE.erl b/test/cets_SUITE.erl
index 32c2ad6..11e7b85 100644
--- a/test/cets_SUITE.erl
+++ b/test/cets_SUITE.erl
@@ -168,8 +168,6 @@ cases() ->
         send_leader_op_throws_noproc,
         pinfo_returns_value,
         pinfo_returns_undefined,
-        cets_ping_non_existing_node,
-        cets_ping_net_family,
         ignore_send_dump_received_when_unpaused,
         ignore_send_dump_received_when_paused_with_another_pause_ref,
         pause_on_remote_node_returns_if_monitor_process_dies
@@ -191,13 +189,6 @@ only_for_logger_cases() ->
 
 seq_cases() ->
     [
-        insert_returns_when_netsplit,
-        inserts_after_netsplit_reconnects,
-        cets_ping_all_returns_when_ping_crashes,
-        ping_pairs_returns_pongs,
-        ping_pairs_returns_earlier,
-        pre_connect_fails_on_our_node,
-        pre_connect_fails_on_one_of_the_nodes,
         send_check_servers_is_called_before_last_server_got_dump,
         remote_ops_are_not_sent_before_last_server_got_dump,
         pause_on_remote_node_crashes
@@ -205,7 +196,6 @@ seq_cases() ->
 
 cets_seq_no_log_cases() ->
     [
-        node_down_history_is_updated_when_netsplit_happens,
         send_check_servers_is_called_before_last_server_got_dump,
         remote_ops_are_not_sent_before_last_server_got_dump
     ].
@@ -1449,127 +1439,6 @@ pinfo_returns_value(_Config) ->
 pinfo_returns_undefined(_Config) ->
     undefined = cets_long:pinfo(stopped_pid(), messages).
 
-%% Netsplit cases (run in sequence)
-
-insert_returns_when_netsplit(Config) ->
-    #{ct5 := Peer5} = proplists:get_value(peers, Config),
-    #{ct5 := Node5} = proplists:get_value(nodes, Config),
-    Node1 = node(),
-    Tab = make_name(Config),
-    {ok, Pid1} = start(Node1, Tab),
-    {ok, Pid5} = start(Peer5, Tab),
-    ok = cets_join:join(lock_name(Config), #{}, Pid1, Pid5),
-    sys:suspend(Pid5),
-    R = cets:insert_request(Tab, {1, test}),
-    block_node(Node5, Peer5),
-    try
-        {reply, ok} = cets:wait_response(R, 5000)
-    after
-        reconnect_node(Node5, Peer5)
-    end.
-
-inserts_after_netsplit_reconnects(Config) ->
-    #{ct5 := Peer5} = proplists:get_value(peers, Config),
-    #{ct5 := Node5} = proplists:get_value(nodes, Config),
-    Node1 = node(),
-    Tab = make_name(Config),
-    {ok, Pid1} = start(Node1, Tab),
-    {ok, Pid5} = start(Peer5, Tab),
-    ok = cets_join:join(lock_name(Config), #{}, Pid1, Pid5),
-    sys:suspend(Pid5),
-    R = cets:insert_request(Tab, {1, v1}),
-    block_node(Node5, Peer5),
-    try
-        {reply, ok} = cets:wait_response(R, 5000)
-    after
-        reconnect_node(Node5, Peer5)
-    end,
-    sys:resume(Pid5),
-    cets:insert(Pid1, {1, v2}),
-    cets:insert(Pid5, {1, v3}),
-    %% No automatic recovery
-    [{1, v2}] = dump(Node1, Tab),
-    [{1, v3}] = dump(Peer5, Tab).
-
-cets_ping_all_returns_when_ping_crashes(Config) ->
-    #{pid1 := Pid1, pid2 := Pid2} = given_two_joined_tables(Config),
-    meck:new(cets, [passthrough]),
-    meck:expect(cets_call, long_call, fun
-        (Server, ping) when Server == Pid2 -> error(simulate_crash);
-        (Server, Msg) -> meck:passthrough([Server, Msg])
-    end),
-    ?assertMatch({error, [{Pid2, {'EXIT', {simulate_crash, _}}}]}, cets:ping_all(Pid1)),
-    meck:unload().
-
-node_down_history_is_updated_when_netsplit_happens(Config) ->
-    %% node_down_history is available in cets:info/1 API.
-    %% It could be used for manual debugging in situations
-    %% we get netsplits or during rolling upgrades.
-    #{ct5 := Peer5} = proplists:get_value(peers, Config),
-    #{ct5 := Node5} = proplists:get_value(nodes, Config),
-    Node1 = node(),
-    Tab = make_name(Config),
-    {ok, Pid1} = start(Node1, Tab),
-    {ok, Pid5} = start(Peer5, Tab),
-    ok = cets_join:join(lock_name(Config), #{}, Pid1, Pid5),
-    block_node(Node5, Peer5),
-    try
-        F = fun() ->
-            History = maps:get(node_down_history, cets:info(Pid1)),
-            lists:map(fun(#{node := Node}) -> Node end, History)
-        end,
-        cets_test_wait:wait_until(F, [Node5])
-    after
-        reconnect_node(Node5, Peer5),
-        cets:stop(Pid5)
-    end.
-
-cets_ping_non_existing_node(_Config) ->
-    pang = cets_ping:ping('mongooseim@non_existing_host').
-
-pre_connect_fails_on_our_node(_Config) ->
-    cets_test_setup:mock_epmd(),
-    %% We would fail to connect to the remote EPMD but we would get an IP
-    pang = cets_ping:ping('mongooseim@resolvabletobadip'),
-    meck:unload().
-
-pre_connect_fails_on_one_of_the_nodes(Config) ->
-    #{ct2 := Node2} = proplists:get_value(nodes, Config),
-    cets_test_setup:mock_epmd(),
-    %% We would get pong on Node2, but would fail an RPC to our hode
-    pang = rpc(Node2, cets_ping, ping, ['cetsnode1@localhost']),
-    History = meck:history(erl_epmd),
-    %% Check that Node2 called us
-    ?assertMatch(
-        [_],
-        [
-            X
-         || {_, {erl_epmd, address_please, ["cetsnode1", "localhost", inet]},
-                {ok, {192, 168, 100, 134}}} = X <- History
-        ],
-        History
-    ),
-    meck:unload().
-
-cets_ping_net_family(_Config) ->
-    inet = cets_ping:net_family(error),
-    inet = cets_ping:net_family({ok, [["inet"]]}),
-    inet6 = cets_ping:net_family({ok, [["inet6"]]}),
-    inet6 = cets_ping:net_family({ok, [["inet6_tls"]]}).
-
-ping_pairs_returns_pongs(Config) ->
-    #{ct2 := Node2, ct3 := Node3} = proplists:get_value(nodes, Config),
-    Me = node(),
-    [{Me, Node2, pong}, {Node2, Node3, pong}] =
-        cets_ping:ping_pairs([{Me, Node2}, {Node2, Node3}]).
-
-ping_pairs_returns_earlier(Config) ->
-    #{ct2 := Node2, ct3 := Node3} = proplists:get_value(nodes, Config),
-    Me = node(),
-    Bad = 'badnode@localhost',
-    [{Me, Me, pong}, {Me, Node2, pong}, {Me, Bad, pang}, {Me, Node3, skipped}] =
-        cets_ping:ping_pairs([{Me, Me}, {Me, Node2}, {Me, Bad}, {Me, Node3}]).
-
 %% Helper functions
 
 still_works(Pid) ->
diff --git a/test/cets_netsplit_SUITE.erl b/test/cets_netsplit_SUITE.erl
new file mode 100644
index 0000000..d20facb
--- /dev/null
+++ b/test/cets_netsplit_SUITE.erl
@@ -0,0 +1,259 @@
+-module(cets_netsplit_SUITE).
+-include_lib("common_test/include/ct.hrl").
+-include_lib("eunit/include/eunit.hrl").
+-include_lib("kernel/include/logger.hrl").
+
+-compile([export_all, nowarn_export_all]).
+
+-import(cets_test_setup, [
+    start/2,
+    start_local/1,
+    start_local/2,
+    start_disco/2,
+    start_simple_disco/0,
+    make_name/1,
+    make_name/2,
+    lock_name/1,
+    disco_name/1
+]).
+
+-import(cets_test_wait, [
+    wait_for_down/1,
+    wait_for_ready/2,
+    wait_till_test_stage/2
+]).
+
+-import(cets_test_setup, [
+    setup_two_nodes_and_discovery/1,
+    setup_two_nodes_and_discovery/2,
+    simulate_disco_restart/1,
+    make_signalling_process/0,
+    given_two_joined_tables/1
+]).
+
+-import(cets_test_wait, [
+    wait_for_disco_timestamp_to_appear/3,
+    wait_for_disco_timestamp_to_be_updated/4
+]).
+
+-import(cets_test_receive, [
+    receive_message/1,
+    flush_message/1
+]).
+
+-import(cets_test_peer, [
+    block_node/2,
+    reconnect_node/2,
+    disconnect_node/2,
+    disconnect_node_by_name/2
+]).
+
+-import(cets_test_rpc, [
+    rpc/4,
+    dump/2
+]).
+
+-import(cets_test_helper, [assert_unique/1]).
+
+-import(cets_test_rpc, [
+    other_nodes/2
+]).
+
+all() ->
+    [
+        {group, cets},
+        {group, cets_seq},
+        {group, cets_seq_no_log}
+    ].
+
+groups() ->
+    %% Cases should have unique names, because we name CETS servers based on case names
+    [
+        {cets, [parallel, {repeat_until_any_fail, 3}], assert_unique(cases())},
+        %% These tests actually simulate a netsplit on the distribution level.
+        %% Though, global's prevent_overlapping_partitions option starts kicking
+        %% all nodes from the cluster, so we have to be careful not to break other cases.
+        %% Setting prevent_overlapping_partitions=false on ct5 helps.
+        {cets_seq, [sequence, {repeat_until_any_fail, 2}], assert_unique(seq_cases())},
+        {cets_seq_no_log, [sequence, {repeat_until_any_fail, 2}],
+            assert_unique(cets_seq_no_log_cases())}
+    ].
+
+cases() ->
+    [
+        cets_ping_non_existing_node,
+        cets_ping_net_family
+    ].
+
+seq_cases() ->
+    [
+        insert_returns_when_netsplit,
+        inserts_after_netsplit_reconnects,
+        cets_ping_all_returns_when_ping_crashes,
+        ping_pairs_returns_pongs,
+        ping_pairs_returns_earlier,
+        pre_connect_fails_on_our_node,
+        pre_connect_fails_on_one_of_the_nodes
+    ].
+
+cets_seq_no_log_cases() ->
+    [
+        node_down_history_is_updated_when_netsplit_happens
+    ].
+
+init_per_suite(Config) ->
+    cets_test_setup:init_cleanup_table(),
+    cets_test_peer:start([ct2, ct3, ct5], Config).
+
+end_per_suite(Config) ->
+    cets_test_setup:remove_cleanup_table(),
+    cets_test_peer:stop(Config),
+    Config.
+
+init_per_group(Group, Config) when Group == cets_seq_no_log; Group == cets_no_log ->
+    [ok = logger:set_module_level(M, none) || M <- log_modules()],
+    Config;
+init_per_group(_Group, Config) ->
+    Config.
+
+end_per_group(Group, Config) when Group == cets_seq_no_log; Group == cets_no_log ->
+    [ok = logger:unset_module_level(M) || M <- log_modules()],
+    Config;
+end_per_group(_Group, Config) ->
+    Config.
+
+init_per_testcase(test_multinode_auto_discovery = Name, Config) ->
+    ct:make_priv_dir(),
+    init_per_testcase_generic(Name, Config);
+init_per_testcase(Name, Config) ->
+    init_per_testcase_generic(Name, Config).
+
+init_per_testcase_generic(Name, Config) ->
+    [{testcase, Name} | Config].
+
+end_per_testcase(_, _Config) ->
+    cets_test_setup:wait_for_cleanup(),
+    ok.
+
+%% Modules that use a multiline LOG_ macro
+log_modules() ->
+    [cets, cets_call, cets_long, cets_join, cets_discovery].
+
+insert_returns_when_netsplit(Config) ->
+    #{ct5 := Peer5} = proplists:get_value(peers, Config),
+    #{ct5 := Node5} = proplists:get_value(nodes, Config),
+    Node1 = node(),
+    Tab = make_name(Config),
+    {ok, Pid1} = start(Node1, Tab),
+    {ok, Pid5} = start(Peer5, Tab),
+    ok = cets_join:join(lock_name(Config), #{}, Pid1, Pid5),
+    sys:suspend(Pid5),
+    R = cets:insert_request(Tab, {1, test}),
+    block_node(Node5, Peer5),
+    try
+        {reply, ok} = cets:wait_response(R, 5000)
+    after
+        reconnect_node(Node5, Peer5)
+    end.
+
+inserts_after_netsplit_reconnects(Config) ->
+    #{ct5 := Peer5} = proplists:get_value(peers, Config),
+    #{ct5 := Node5} = proplists:get_value(nodes, Config),
+    Node1 = node(),
+    Tab = make_name(Config),
+    {ok, Pid1} = start(Node1, Tab),
+    {ok, Pid5} = start(Peer5, Tab),
+    ok = cets_join:join(lock_name(Config), #{}, Pid1, Pid5),
+    sys:suspend(Pid5),
+    R = cets:insert_request(Tab, {1, v1}),
+    block_node(Node5, Peer5),
+    try
+        {reply, ok} = cets:wait_response(R, 5000)
+    after
+        reconnect_node(Node5, Peer5)
+    end,
+    sys:resume(Pid5),
+    cets:insert(Pid1, {1, v2}),
+    cets:insert(Pid5, {1, v3}),
+    %% No automatic recovery
+    [{1, v2}] = dump(Node1, Tab),
+    [{1, v3}] = dump(Peer5, Tab).
+
+cets_ping_all_returns_when_ping_crashes(Config) ->
+    #{pid1 := Pid1, pid2 := Pid2} = given_two_joined_tables(Config),
+    meck:new(cets, [passthrough]),
+    meck:expect(cets_call, long_call, fun
+        (Server, ping) when Server == Pid2 -> error(simulate_crash);
+        (Server, Msg) -> meck:passthrough([Server, Msg])
+    end),
+    ?assertMatch({error, [{Pid2, {'EXIT', {simulate_crash, _}}}]}, cets:ping_all(Pid1)),
+    meck:unload().
+
+node_down_history_is_updated_when_netsplit_happens(Config) ->
+    %% node_down_history is available in cets:info/1 API.
+    %% It could be used for manual debugging in situations
+    %% we get netsplits or during rolling upgrades.
+    #{ct5 := Peer5} = proplists:get_value(peers, Config),
+    #{ct5 := Node5} = proplists:get_value(nodes, Config),
+    Node1 = node(),
+    Tab = make_name(Config),
+    {ok, Pid1} = start(Node1, Tab),
+    {ok, Pid5} = start(Peer5, Tab),
+    ok = cets_join:join(lock_name(Config), #{}, Pid1, Pid5),
+    block_node(Node5, Peer5),
+    try
+        F = fun() ->
+            History = maps:get(node_down_history, cets:info(Pid1)),
+            lists:map(fun(#{node := Node}) -> Node end, History)
+        end,
+        cets_test_wait:wait_until(F, [Node5])
+    after
+        reconnect_node(Node5, Peer5),
+        cets:stop(Pid5)
+    end.
+
+cets_ping_non_existing_node(_Config) ->
+    pang = cets_ping:ping('mongooseim@non_existing_host').
+
+pre_connect_fails_on_our_node(_Config) ->
+    cets_test_setup:mock_epmd(),
+    %% We would fail to connect to the remote EPMD but we would get an IP
+    pang = cets_ping:ping('mongooseim@resolvabletobadip'),
+    meck:unload().
+
+pre_connect_fails_on_one_of_the_nodes(Config) ->
+    #{ct2 := Node2} = proplists:get_value(nodes, Config),
+    cets_test_setup:mock_epmd(),
+    %% We would get pong on Node2, but would fail an RPC to our hode
+    pang = rpc(Node2, cets_ping, ping, ['cetsnode1@localhost']),
+    History = meck:history(erl_epmd),
+    %% Check that Node2 called us
+    ?assertMatch(
+        [_],
+        [
+            X
+         || {_, {erl_epmd, address_please, ["cetsnode1", "localhost", inet]},
+                {ok, {192, 168, 100, 134}}} = X <- History
+        ],
+        History
+    ),
+    meck:unload().
+
+cets_ping_net_family(_Config) ->
+    inet = cets_ping:net_family(error),
+    inet = cets_ping:net_family({ok, [["inet"]]}),
+    inet6 = cets_ping:net_family({ok, [["inet6"]]}),
+    inet6 = cets_ping:net_family({ok, [["inet6_tls"]]}).
+
+ping_pairs_returns_pongs(Config) ->
+    #{ct2 := Node2, ct3 := Node3} = proplists:get_value(nodes, Config),
+    Me = node(),
+    [{Me, Node2, pong}, {Node2, Node3, pong}] =
+        cets_ping:ping_pairs([{Me, Node2}, {Node2, Node3}]).
+
+ping_pairs_returns_earlier(Config) ->
+    #{ct2 := Node2, ct3 := Node3} = proplists:get_value(nodes, Config),
+    Me = node(),
+    Bad = 'badnode@localhost',
+    [{Me, Me, pong}, {Me, Node2, pong}, {Me, Bad, pang}, {Me, Node3, skipped}] =
+        cets_ping:ping_pairs([{Me, Me}, {Me, Node2}, {Me, Bad}, {Me, Node3}]).

From c19a3c4f1168d6ecafe012090dc3313febdfd59f Mon Sep 17 00:00:00 2001
From: Mikhail Uvarov <arcusfelis@gmail.com>
Date: Thu, 9 May 2024 21:59:43 +0200
Subject: [PATCH 21/30] Move start_link_local test

---
 test/cets_SUITE.erl      | 13 +------------
 test/cets_join_SUITE.erl | 15 +++++++++++----
 test/cets_test_setup.erl | 12 ++++++++++++
 3 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/test/cets_SUITE.erl b/test/cets_SUITE.erl
index 11e7b85..8e49f81 100644
--- a/test/cets_SUITE.erl
+++ b/test/cets_SUITE.erl
@@ -99,7 +99,6 @@ cases() ->
         delete_works,
         delete_many_works,
         inserted_records_could_be_read_back_from_replicated_table,
-        bag_with_conflict_handler_not_allowed_for_start_link,
         insert_new_works,
         insert_new_works_with_table_name,
         insert_new_works_when_leader_is_back,
@@ -237,7 +236,7 @@ log_modules() ->
 
 start_link_inits_and_accepts_records(Config) ->
     Tab = make_name(Config),
-    start_link_local(Tab),
+    cets_test_setup:start_link_local(Tab),
     cets:insert(Tab, {alice, 32}),
     [{alice, 32}] = ets:lookup(Tab, alice).
 
@@ -1447,16 +1446,6 @@ still_works(Pid) ->
     ok = cets:insert(Pid, {1}),
     {ok, [{1}]} = cets:remote_dump(Pid).
 
-start_link_local(Name) ->
-    start_link_local(Name, #{}).
-
-start_link_local(Name, Opts) ->
-    catch cets:stop(Name),
-    wait_for_name_to_be_free(node(), Name),
-    {ok, Pid} = cets:start_link(Name, Opts),
-    schedule_cleanup(Pid),
-    {ok, Pid}.
-
 stopped_pid() ->
     %% Get a pid for a stopped process
     {Pid, Mon} = spawn_monitor(fun() -> ok end),
diff --git a/test/cets_join_SUITE.erl b/test/cets_join_SUITE.erl
index 2f02212..ac44c62 100644
--- a/test/cets_join_SUITE.erl
+++ b/test/cets_join_SUITE.erl
@@ -99,10 +99,13 @@ groups() ->
 cases() ->
     [
         join_works,
+        join_works_with_existing_data,
+        join_works_with_existing_data_with_conflicts,
         join_works_with_existing_data_with_conflicts_and_defined_conflict_handler,
         join_works_with_existing_data_with_conflicts_and_defined_conflict_handler_and_more_keys,
         join_works_with_existing_data_with_conflicts_and_defined_conflict_handler_and_keypos2,
         bag_with_conflict_handler_not_allowed,
+        bag_with_conflict_handler_not_allowed_for_start_link,
         join_with_the_same_pid,
         join_ref_is_same_after_join,
         join_fails_because_server_process_not_found,
@@ -194,6 +197,14 @@ join_works_with_existing_data_with_conflicts(Config) ->
     [{alice, 33}] = ets:lookup(Tab1, alice),
     [{alice, 32}] = ets:lookup(Tab2, alice).
 
+bag_with_conflict_handler_not_allowed(Config) ->
+    {error, [bag_with_conflict_handler]} =
+        cets:start(make_name(Config), #{handle_conflict => fun resolve_highest/2, type => bag}).
+
+bag_with_conflict_handler_not_allowed_for_start_link(Config) ->
+    {error, [bag_with_conflict_handler]} =
+        cets:start_link(make_name(Config), #{handle_conflict => fun resolve_highest/2, type => bag}).
+
 join_works_with_existing_data_with_conflicts_and_defined_conflict_handler(Config) ->
     Opts = #{handle_conflict => fun resolve_highest/2},
     Tab1 = make_name(Config, 1),
@@ -251,10 +262,6 @@ resolve_user_conflict(_U1, U2) ->
 resolve_highest({K, A}, {K, B}) ->
     {K, max(A, B)}.
 
-bag_with_conflict_handler_not_allowed(Config) ->
-    {error, [bag_with_conflict_handler]} =
-        cets:start(make_name(Config), #{handle_conflict => fun resolve_highest/2, type => bag}).
-
 join_with_the_same_pid(Config) ->
     Tab = make_name(Config),
     {ok, Pid} = start_local(Tab),
diff --git a/test/cets_test_setup.erl b/test/cets_test_setup.erl
index 3decf1d..48715fe 100644
--- a/test/cets_test_setup.erl
+++ b/test/cets_test_setup.erl
@@ -13,6 +13,8 @@
 -export([
     start_local/1,
     start_local/2,
+    start_link_local/1,
+    start_link_local/2,
     start/2,
     start_disco/2,
     start_simple_disco/0
@@ -80,6 +82,16 @@ start(Node, Tab) ->
     schedule_cleanup(Pid),
     {ok, Pid}.
 
+start_link_local(Name) ->
+    start_link_local(Name, #{}).
+
+start_link_local(Name, Opts) ->
+    catch cets:stop(Name),
+    cets_test_wait:wait_for_name_to_be_free(node(), Name),
+    {ok, Pid} = cets:start_link(Name, Opts),
+    schedule_cleanup(Pid),
+    {ok, Pid}.
+
 start_disco(Node, Opts) ->
     case Opts of
         #{name := Name} ->

From e3ad79cccd6c61a8bbed05149ba1afa2906ee597 Mon Sep 17 00:00:00 2001
From: Mikhail Uvarov <arcusfelis@gmail.com>
Date: Thu, 9 May 2024 22:17:13 +0200
Subject: [PATCH 22/30] Debug tests

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index f452c10..9e6b947 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -22,7 +22,7 @@ jobs:
     - name: Compile
       run: rebar3 as test compile
     - name: Run tests
-      run: rebar3 cover_tests
+      run: DEBUG=1 rebar3 cover_tests
     - name: Send test coverage report
       run: rebar3 as test codecov analyze
     - name: Upload coverage reports to Codecov

From a211b72aeba53d9ff5fd6604e2f8562bcc176d3a Mon Sep 17 00:00:00 2001
From: Mikhail Uvarov <arcusfelis@gmail.com>
Date: Fri, 10 May 2024 13:15:32 +0200
Subject: [PATCH 23/30] Add 10 seconds timetrap

---
 test/app.config            | 1 +
 test/cets_SUITE.erl        | 3 +++
 test/cets_disco_SUITE.erl  | 3 +++
 test/cets_join_SUITE.erl   | 3 +++
 test/cets_status_SUITE.erl | 3 +++
 test/cets_test_setup.erl   | 5 +++++
 6 files changed, 18 insertions(+)
 create mode 100644 test/app.config

diff --git a/test/app.config b/test/app.config
new file mode 100644
index 0000000..f468a9d
--- /dev/null
+++ b/test/app.config
@@ -0,0 +1 @@
+[{timetrap, 1}].
diff --git a/test/cets_SUITE.erl b/test/cets_SUITE.erl
index 8e49f81..f1f2534 100644
--- a/test/cets_SUITE.erl
+++ b/test/cets_SUITE.erl
@@ -65,6 +65,9 @@
     set_join_ref/2
 ]).
 
+suite() ->
+    cets_test_setup:suite().
+
 all() ->
     [
         {group, cets},
diff --git a/test/cets_disco_SUITE.erl b/test/cets_disco_SUITE.erl
index 08b410d..4f3052d 100644
--- a/test/cets_disco_SUITE.erl
+++ b/test/cets_disco_SUITE.erl
@@ -56,6 +56,9 @@
     other_nodes/2
 ]).
 
+suite() ->
+    cets_test_setup:suite().
+
 all() ->
     [
         {group, cets},
diff --git a/test/cets_join_SUITE.erl b/test/cets_join_SUITE.erl
index ac44c62..2815241 100644
--- a/test/cets_join_SUITE.erl
+++ b/test/cets_join_SUITE.erl
@@ -67,6 +67,9 @@
     other_nodes/2
 ]).
 
+suite() ->
+    cets_test_setup:suite().
+
 all() ->
     [
         {group, cets},
diff --git a/test/cets_status_SUITE.erl b/test/cets_status_SUITE.erl
index a6bb648..01ce0a4 100644
--- a/test/cets_status_SUITE.erl
+++ b/test/cets_status_SUITE.erl
@@ -50,6 +50,9 @@
     other_nodes/2
 ]).
 
+suite() ->
+    cets_test_setup:suite().
+
 all() ->
     [
         {group, cets}
diff --git a/test/cets_test_setup.erl b/test/cets_test_setup.erl
index 48715fe..d6827a2 100644
--- a/test/cets_test_setup.erl
+++ b/test/cets_test_setup.erl
@@ -1,4 +1,6 @@
 -module(cets_test_setup).
+-export([suite/0]).
+
 -export([
     mock_epmd/0,
     mock_pause_on_remote_node_failing/0
@@ -51,6 +53,9 @@
 
 -import(cets_test_rpc, [rpc/4]).
 
+suite() ->
+    [{timetrap, {seconds, 10}}].
+
 mock_epmd() ->
     meck:new(erl_epmd, [passthrough, unstick]),
     meck:expect(erl_epmd, address_please, fun

From 07839a6989e4a76d4b608c37af8c08c96ea14d3f Mon Sep 17 00:00:00 2001
From: Mikhail Uvarov <arcusfelis@gmail.com>
Date: Fri, 10 May 2024 13:24:34 +0200
Subject: [PATCH 24/30] Use init:stop() to stop peers in tests

---
 test/cets_test_peer.erl | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/test/cets_test_peer.erl b/test/cets_test_peer.erl
index 632efbe..be33877 100644
--- a/test/cets_test_peer.erl
+++ b/test/cets_test_peer.erl
@@ -34,7 +34,10 @@ name(Node) ->
 
 start_node(Id) ->
     {ok, Peer, Node} = ?CT_PEER(#{
-        name => name(Id), connection => standard_io, args => extra_args(Id)
+        name => name(Id),
+        connection => standard_io,
+        args => extra_args(Id),
+        shutdown => 3000
     }),
     %% Register so we can find Peer process later in code
     register(node_to_peer_name(Node), Peer),

From 0d98f7471dd94a5071c11adfbbab580849d90fcd Mon Sep 17 00:00:00 2001
From: Mikhail Uvarov <arcusfelis@gmail.com>
Date: Mon, 13 May 2024 17:19:11 +0200
Subject: [PATCH 25/30] Monitorr stopping peers

---
 test/cets_test_peer.erl | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/test/cets_test_peer.erl b/test/cets_test_peer.erl
index be33877..26353e7 100644
--- a/test/cets_test_peer.erl
+++ b/test/cets_test_peer.erl
@@ -26,7 +26,14 @@ start(Names, Config) ->
 
 stop(Config) ->
     Peers = proplists:get_value(peers, Config),
-    [peer:stop(Peer) || Peer <- maps:values(Peers)],
+    [
+        slow_task(
+            "peer:stop:self",
+            self(),
+            fun() -> slow_task("peer:stop", Peer, fun() -> peer:stop(Peer) end) end
+        )
+     || Peer <- maps:values(Peers)
+    ],
     ok.
 
 name(Node) ->
@@ -98,3 +105,18 @@ disconnect_node_by_name(Config, Id) ->
         lists:member(Node, nodes())
     end,
     cets_test_wait:wait_until(F, false).
+
+slow_task(What, Self, F) ->
+    Pid = spawn_link(fun() -> monitor_loop(What, Self) end),
+    Res = F(),
+    Pid ! stop,
+    Res.
+
+monitor_loop(What, Pid) ->
+    receive
+        stop ->
+            ok
+    after 1000 ->
+        ct:pal("monitor_loop ~p ~p", [What, erlang:process_info(Pid, current_stacktrace)]),
+        monitor_loop(What, Pid)
+    end.

From b9908fc54b0dd0286f4b5c0668372263b3cd0a40 Mon Sep 17 00:00:00 2001
From: Mikhail Uvarov <arcusfelis@gmail.com>
Date: Tue, 14 May 2024 17:37:20 +0200
Subject: [PATCH 26/30] Reuse nodes in cets_test_peer

cover logic is broken in Erlang 26. So, peer:stop/1 should not be called
---
 .github/workflows/ci.yml |  2 +-
 test/app.config          |  1 -
 test/cets_test_peer.erl  | 47 ++++++++++++++++------------------------
 3 files changed, 20 insertions(+), 30 deletions(-)
 delete mode 100644 test/app.config

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 9e6b947..f452c10 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -22,7 +22,7 @@ jobs:
     - name: Compile
       run: rebar3 as test compile
     - name: Run tests
-      run: DEBUG=1 rebar3 cover_tests
+      run: rebar3 cover_tests
     - name: Send test coverage report
       run: rebar3 as test codecov analyze
     - name: Upload coverage reports to Codecov
diff --git a/test/app.config b/test/app.config
deleted file mode 100644
index f468a9d..0000000
--- a/test/app.config
+++ /dev/null
@@ -1 +0,0 @@
-[{timetrap, 1}].
diff --git a/test/cets_test_peer.erl b/test/cets_test_peer.erl
index 26353e7..d2fedba 100644
--- a/test/cets_test_peer.erl
+++ b/test/cets_test_peer.erl
@@ -17,7 +17,7 @@
 -include_lib("common_test/include/ct.hrl").
 
 start(Names, Config) ->
-    {Nodes, Peers} = lists:unzip([start_node(N) || N <- Names]),
+    {Nodes, Peers} = lists:unzip([find_or_start_node(N) || N <- Names]),
     [
         {nodes, maps:from_list(lists:zip(Names, Nodes))},
         {peers, maps:from_list(lists:zip(Names, Peers))}
@@ -25,20 +25,27 @@ start(Names, Config) ->
     ].
 
 stop(Config) ->
-    Peers = proplists:get_value(peers, Config),
+    %% peer:stop/1 freezes in the code cover logic.
+    %% So, we reuse nodes between different suites.
+    %% Ensure that the nodes are connected again.
+    Nodes = proplists:get_value(nodes, Config),
     [
-        slow_task(
-            "peer:stop:self",
-            self(),
-            fun() -> slow_task("peer:stop", Peer, fun() -> peer:stop(Peer) end) end
-        )
-     || Peer <- maps:values(Peers)
+        reconnect_node(Node, node_to_peer(Node))
+     || Node <- maps:values(Nodes)
     ],
     ok.
 
 name(Node) ->
     list_to_atom(peer:random_name(atom_to_list(Node))).
 
+find_or_start_node(Id) ->
+    case persistent_term:get({id_to_node_peer, Id}, undefined) of
+        undefined ->
+            start_node(Id);
+        NodePeer ->
+            NodePeer
+    end.
+
 start_node(Id) ->
     {ok, Peer, Node} = ?CT_PEER(#{
         name => name(Id),
@@ -47,7 +54,8 @@ start_node(Id) ->
         shutdown => 3000
     }),
     %% Register so we can find Peer process later in code
-    register(node_to_peer_name(Node), Peer),
+    persistent_term:put({node_to_peer, Node}, Peer),
+    persistent_term:put({id_to_node_peer, Id}, {Node, Peer}),
     %% Keep nodes running after init_per_suite is finished
     unlink(Peer),
     %% Do RPC using alternative connection method
@@ -60,16 +68,13 @@ node_to_peer(Node) when Node =:= node() ->
     %% There is no peer for the local CT node
     Node;
 node_to_peer(Node) when is_atom(Node) ->
-    case whereis(node_to_peer_name(Node)) of
+    case persistent_term:get({node_to_peer, Node}) of
         Pid when is_pid(Pid) ->
             Pid;
         undefined ->
             ct:fail({node_to_peer_failed, Node})
     end.
 
-node_to_peer_name(Node) ->
-    list_to_atom(atom_to_list(Node) ++ "_peer").
-
 %% Set epmd_port for better coverage
 extra_args(ct2) ->
     ["-epmd_port", "4369"];
@@ -88,6 +93,7 @@ block_node(Node, Peer) when is_atom(Node), is_pid(Peer) ->
 
 reconnect_node(Node, Peer) when is_atom(Node), is_pid(Peer) ->
     rpc(Peer, erlang, set_cookie, [node(), erlang:get_cookie()]),
+    erlang:set_cookie(Node, erlang:get_cookie()),
     %% Very rarely it could return pang
     cets_test_wait:wait_until(fun() -> rpc(Peer, net_adm, ping, [node()]) end, pong),
     cets_test_wait:wait_until(fun() -> rpc(node(), net_adm, ping, [Node]) end, pong).
@@ -105,18 +111,3 @@ disconnect_node_by_name(Config, Id) ->
         lists:member(Node, nodes())
     end,
     cets_test_wait:wait_until(F, false).
-
-slow_task(What, Self, F) ->
-    Pid = spawn_link(fun() -> monitor_loop(What, Self) end),
-    Res = F(),
-    Pid ! stop,
-    Res.
-
-monitor_loop(What, Pid) ->
-    receive
-        stop ->
-            ok
-    after 1000 ->
-        ct:pal("monitor_loop ~p ~p", [What, erlang:process_info(Pid, current_stacktrace)]),
-        monitor_loop(What, Pid)
-    end.

From 6492e60ca5e151952c03bab0d42a4f335e917a63 Mon Sep 17 00:00:00 2001
From: Mikhail Uvarov <arcusfelis@gmail.com>
Date: Thu, 16 May 2024 17:30:02 +0200
Subject: [PATCH 27/30] Remove unused imports

---
 test/cets_SUITE.erl | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/test/cets_SUITE.erl b/test/cets_SUITE.erl
index f1f2534..b6a513a 100644
--- a/test/cets_SUITE.erl
+++ b/test/cets_SUITE.erl
@@ -13,8 +13,6 @@
 -compile([export_all, nowarn_export_all]).
 
 -import(cets_test_peer, [
-    block_node/2,
-    reconnect_node/2,
     disconnect_node/2
 ]).
 
@@ -40,14 +38,11 @@
     given_two_joined_tables/1,
     given_two_joined_tables/2,
     given_3_servers/1,
-    given_3_servers/2,
-    given_n_servers/3,
     make_process/0
 ]).
 
 -import(cets_test_wait, [
     wait_for_down/1,
-    wait_for_ready/2,
     wait_for_unpaused/3,
     wait_for_join_ref_to_match/2,
     wait_till_test_stage/2,
@@ -61,7 +56,6 @@
 
 -import(cets_test_helper, [
     assert_unique/1,
-    set_other_servers/2,
     set_join_ref/2
 ]).
 

From 5f6d9483b7132d60e110caecdd9b9d1b5132125c Mon Sep 17 00:00:00 2001
From: Mikhail Uvarov <arcusfelis@gmail.com>
Date: Thu, 16 May 2024 17:33:10 +0200
Subject: [PATCH 28/30] Move
 servers_remove_each_other_if_join_refs_do_not_match_after_unpause

---
 test/cets_SUITE.erl      | 15 ---------------
 test/cets_join_SUITE.erl | 17 ++++++++++++++++-
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/test/cets_SUITE.erl b/test/cets_SUITE.erl
index b6a513a..3fa782f 100644
--- a/test/cets_SUITE.erl
+++ b/test/cets_SUITE.erl
@@ -116,7 +116,6 @@ cases() ->
         insert_serial_blocks_when_leader_is_not_back,
         leader_is_the_same_in_metadata_after_join,
         send_dump_contains_already_added_servers,
-        servers_remove_each_other_if_join_refs_do_not_match_after_unpause,
         test_multinode,
         test_multinode_remote_insert,
         node_list_is_correct,
@@ -783,20 +782,6 @@ send_dump_contains_already_added_servers(Config) ->
     cets:unpause(Pid1, PauseRef),
     {ok, [{1}]} = cets:remote_dump(Pid1).
 
-servers_remove_each_other_if_join_refs_do_not_match_after_unpause(Config) ->
-    {ok, Pid1} = start_local(make_name(Config, 1)),
-    {ok, Pid2} = start_local(make_name(Config, 2)),
-    %% cets:send_check_servers function is only called after all pauses are unpaused
-    PauseRef1 = cets:pause(Pid1),
-    PauseRef2 = cets:pause(Pid2),
-    ok = cets_join:join(lock_name(Config), #{}, Pid1, Pid2, #{}),
-    %% send_check_servers is not called yet, because we are still pausing.
-    %% Mess with join_ref in the state.
-    set_join_ref(Pid1, make_ref()),
-    cets:unpause(Pid1, PauseRef1),
-    cets:unpause(Pid2, PauseRef2),
-    cets_test_wait:wait_until(fun() -> maps:get(other_servers, cets:info(Pid1)) end, []).
-
 ignore_send_dump_received_when_paused_with_another_pause_ref(Config) ->
     ignore_send_dump_received_when_unpaused([{extra_pause, true} | Config]).
 
diff --git a/test/cets_join_SUITE.erl b/test/cets_join_SUITE.erl
index 2815241..99b835f 100644
--- a/test/cets_join_SUITE.erl
+++ b/test/cets_join_SUITE.erl
@@ -121,7 +121,8 @@ cases() ->
         join_fails_because_pids_do_not_match_for_nodes_in_segment,
         join_fails_because_servers_overlap,
         remote_ops_are_ignored_if_join_ref_does_not_match,
-        join_retried_if_lock_is_busy
+        join_retried_if_lock_is_busy,
+        servers_remove_each_other_if_join_refs_do_not_match_after_unpause
     ].
 
 seq_cases() ->
@@ -644,3 +645,17 @@ send_join_start_back_and_wait_for_continue_joining() ->
         (_) ->
             ok
     end.
+
+servers_remove_each_other_if_join_refs_do_not_match_after_unpause(Config) ->
+    {ok, Pid1} = start_local(make_name(Config, 1)),
+    {ok, Pid2} = start_local(make_name(Config, 2)),
+    %% cets:send_check_servers function is only called after all pauses are unpaused
+    PauseRef1 = cets:pause(Pid1),
+    PauseRef2 = cets:pause(Pid2),
+    ok = cets_join:join(lock_name(Config), #{}, Pid1, Pid2, #{}),
+    %% send_check_servers is not called yet, because we are still pausing.
+    %% Mess with join_ref in the state.
+    set_join_ref(Pid1, make_ref()),
+    cets:unpause(Pid1, PauseRef1),
+    cets:unpause(Pid2, PauseRef2),
+    cets_test_wait:wait_until(fun() -> maps:get(other_servers, cets:info(Pid1)) end, []).

From 0209ac8c41bbbc5cb0248ca8b349f1af89ade93b Mon Sep 17 00:00:00 2001
From: Mikhail Uvarov <arcusfelis@gmail.com>
Date: Thu, 16 May 2024 17:42:15 +0200
Subject: [PATCH 29/30] Remove unused imports

---
 test/cets_join_SUITE.erl     | 31 +++----------------------------
 test/cets_netsplit_SUITE.erl | 31 ++-----------------------------
 test/cets_status_SUITE.erl   | 19 +------------------
 3 files changed, 6 insertions(+), 75 deletions(-)

diff --git a/test/cets_join_SUITE.erl b/test/cets_join_SUITE.erl
index 99b835f..bfae46a 100644
--- a/test/cets_join_SUITE.erl
+++ b/test/cets_join_SUITE.erl
@@ -9,25 +9,12 @@
     start/2,
     start_local/1,
     start_local/2,
-    start_disco/2,
-    start_simple_disco/0,
     make_name/1,
     make_name/2,
-    lock_name/1,
-    disco_name/1
-]).
-
--import(cets_test_wait, [
-    wait_for_down/1,
-    wait_for_ready/2,
-    wait_till_test_stage/2
+    lock_name/1
 ]).
 
 -import(cets_test_setup, [
-    setup_two_nodes_and_discovery/1,
-    setup_two_nodes_and_discovery/2,
-    simulate_disco_restart/1,
-    make_signalling_process/0,
     given_two_joined_tables/1,
     given_two_joined_tables/2,
     given_3_servers/1,
@@ -35,22 +22,14 @@
     given_n_servers/3
 ]).
 
--import(cets_test_wait, [
-    wait_for_disco_timestamp_to_appear/3,
-    wait_for_disco_timestamp_to_be_updated/4
-]).
-
 -import(cets_test_receive, [
     receive_message/1,
-    receive_message_with_arg/1,
-    flush_message/1
+    receive_message_with_arg/1
 ]).
 
 -import(cets_test_peer, [
     block_node/2,
-    reconnect_node/2,
-    disconnect_node/2,
-    disconnect_node_by_name/2
+    reconnect_node/2
 ]).
 
 -import(cets_test_rpc, [
@@ -63,10 +42,6 @@
     assert_unique/1
 ]).
 
--import(cets_test_rpc, [
-    other_nodes/2
-]).
-
 suite() ->
     cets_test_setup:suite().
 
diff --git a/test/cets_netsplit_SUITE.erl b/test/cets_netsplit_SUITE.erl
index d20facb..63687b1 100644
--- a/test/cets_netsplit_SUITE.erl
+++ b/test/cets_netsplit_SUITE.erl
@@ -7,45 +7,18 @@
 
 -import(cets_test_setup, [
     start/2,
-    start_local/1,
-    start_local/2,
-    start_disco/2,
-    start_simple_disco/0,
     make_name/1,
     make_name/2,
-    lock_name/1,
-    disco_name/1
-]).
-
--import(cets_test_wait, [
-    wait_for_down/1,
-    wait_for_ready/2,
-    wait_till_test_stage/2
+    lock_name/1
 ]).
 
 -import(cets_test_setup, [
-    setup_two_nodes_and_discovery/1,
-    setup_two_nodes_and_discovery/2,
-    simulate_disco_restart/1,
-    make_signalling_process/0,
     given_two_joined_tables/1
 ]).
 
--import(cets_test_wait, [
-    wait_for_disco_timestamp_to_appear/3,
-    wait_for_disco_timestamp_to_be_updated/4
-]).
-
--import(cets_test_receive, [
-    receive_message/1,
-    flush_message/1
-]).
-
 -import(cets_test_peer, [
     block_node/2,
-    reconnect_node/2,
-    disconnect_node/2,
-    disconnect_node_by_name/2
+    reconnect_node/2
 ]).
 
 -import(cets_test_rpc, [
diff --git a/test/cets_status_SUITE.erl b/test/cets_status_SUITE.erl
index 01ce0a4..b5eeafe 100644
--- a/test/cets_status_SUITE.erl
+++ b/test/cets_status_SUITE.erl
@@ -7,8 +7,6 @@
 
 -import(cets_test_setup, [
     start/2,
-    start_local/1,
-    start_local/2,
     start_disco/2,
     make_name/1,
     make_name/2,
@@ -26,19 +24,8 @@
     simulate_disco_restart/1
 ]).
 
--import(cets_test_wait, [
-    wait_for_disco_timestamp_to_appear/3,
-    wait_for_disco_timestamp_to_be_updated/4
-]).
-
 -import(cets_test_receive, [
-    receive_message/1,
-    flush_message/1
-]).
-
--import(cets_test_peer, [
-    disconnect_node/2,
-    disconnect_node_by_name/2
+    receive_message/1
 ]).
 
 -import(cets_test_helper, [
@@ -46,10 +33,6 @@
     set_other_servers/2
 ]).
 
--import(cets_test_rpc, [
-    other_nodes/2
-]).
-
 suite() ->
     cets_test_setup:suite().
 

From 06d99a8f579d44b070274ab510b3e9508eccd445 Mon Sep 17 00:00:00 2001
From: Mikhail Uvarov <arcusfelis@gmail.com>
Date: Thu, 16 May 2024 17:44:57 +0200
Subject: [PATCH 30/30] Rename disconnect_node_by_name to disconnect_node_by_id

---
 test/cets_disco_SUITE.erl |  4 ++--
 test/cets_test_peer.erl   | 12 ++++++------
 test/cets_test_setup.erl  |  6 +++---
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/test/cets_disco_SUITE.erl b/test/cets_disco_SUITE.erl
index 4f3052d..98bf829 100644
--- a/test/cets_disco_SUITE.erl
+++ b/test/cets_disco_SUITE.erl
@@ -43,7 +43,7 @@
     block_node/2,
     reconnect_node/2,
     disconnect_node/2,
-    disconnect_node_by_name/2
+    disconnect_node_by_id/2
 ]).
 
 -import(cets_test_rpc, [
@@ -636,7 +636,7 @@ disco_nodeup_timestamp_is_updated_after_node_reconnects(Config) ->
     Setup = setup_two_nodes_and_discovery(Config, [wait, disco2]),
     #{disco := Disco, node2 := Node2} = Setup,
     OldTimestamp = cets_test_helper:get_disco_timestamp(Disco, nodeup_timestamps, Node2),
-    disconnect_node_by_name(Config, ct2),
+    disconnect_node_by_id(Config, ct2),
     wait_for_disco_timestamp_to_be_updated(Disco, nodeup_timestamps, Node2, OldTimestamp).
 
 disco_node_start_timestamp_is_updated_after_node_restarts(Config) ->
diff --git a/test/cets_test_peer.erl b/test/cets_test_peer.erl
index d2fedba..ce83f0c 100644
--- a/test/cets_test_peer.erl
+++ b/test/cets_test_peer.erl
@@ -9,18 +9,18 @@
     block_node/2,
     reconnect_node/2,
     disconnect_node/2,
-    disconnect_node_by_name/2
+    disconnect_node_by_id/2
 ]).
 
 -import(cets_test_rpc, [rpc/4]).
 
 -include_lib("common_test/include/ct.hrl").
 
-start(Names, Config) ->
-    {Nodes, Peers} = lists:unzip([find_or_start_node(N) || N <- Names]),
+start(Ids, Config) ->
+    {Nodes, Peers} = lists:unzip([find_or_start_node(Id) || Id <- Ids]),
     [
-        {nodes, maps:from_list(lists:zip(Names, Nodes))},
-        {peers, maps:from_list(lists:zip(Names, Peers))}
+        {nodes, maps:from_list(lists:zip(Ids, Nodes))},
+        {peers, maps:from_list(lists:zip(Ids, Peers))}
         | Config
     ].
 
@@ -101,7 +101,7 @@ reconnect_node(Node, Peer) when is_atom(Node), is_pid(Peer) ->
 disconnect_node(RPCNode, DisconnectNode) ->
     rpc(RPCNode, erlang, disconnect_node, [DisconnectNode]).
 
-disconnect_node_by_name(Config, Id) ->
+disconnect_node_by_id(Config, Id) ->
     Peer = maps:get(Id, proplists:get_value(peers, Config)),
     Node = maps:get(Id, proplists:get_value(nodes, Config)),
     %% We could need to retry to disconnect, if the local node is currently trying to establish a connection
diff --git a/test/cets_test_setup.erl b/test/cets_test_setup.erl
index d6827a2..e8c016c 100644
--- a/test/cets_test_setup.erl
+++ b/test/cets_test_setup.erl
@@ -48,7 +48,7 @@
 
 -import(cets_test_peer, [
     disconnect_node/2,
-    disconnect_node_by_name/2
+    disconnect_node_by_id/2
 ]).
 
 -import(cets_test_rpc, [rpc/4]).
@@ -214,7 +214,7 @@ setup_two_nodes_and_discovery(Config, Flags) ->
     Node1 = node(),
     #{ct2 := Peer2} = proplists:get_value(peers, Config),
     #{ct2 := Node2} = proplists:get_value(nodes, Config),
-    disconnect_node_by_name(Config, ct2),
+    disconnect_node_by_id(Config, ct2),
     Tab = make_name(Config),
     {ok, _Pid1} = start(Node1, Tab),
     {ok, _Pid2} = start(Peer2, Tab),
@@ -252,7 +252,7 @@ setup_two_nodes_and_discovery(Config, Flags) ->
     case lists:member(netsplit, Flags) of
         true ->
             %% Simulate a loss of connection between nodes
-            disconnect_node_by_name(Config, ct2);
+            disconnect_node_by_id(Config, ct2);
         false ->
             ok
     end,