From f4395f577054fe7a06e543198abf3eeae6be93ad Mon Sep 17 00:00:00 2001 From: Serge Tupchii Date: Tue, 9 Apr 2024 15:44:22 +0300 Subject: [PATCH] fix: prevent replicants from joining core nodes that disabled node discovery Core node may leave a cluster and disable discovery. In this case, replicants must ignore (don't connect to) this node. --- src/mria_config.erl | 16 ++++++---------- src/mria_lb.erl | 12 +++++++++++- test/mria_lb_SUITE.erl | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 11 deletions(-) diff --git a/src/mria_config.erl b/src/mria_config.erl index a41f599..0777356 100644 --- a/src/mria_config.erl +++ b/src/mria_config.erl @@ -252,21 +252,17 @@ set_extra_mnesia_diagnostic_checks(Checks) when is_list(Checks) -> get_extra_mnesia_diagnostic_checks() -> persistent_term:get(?mria(extra_mnesia_diagnostic_checks), []). +%% When `core_node_discovery = false` on a replicant node, +%% the replicant will stop discovering core nodes. +%% When it is `false` on a core node, other replicants will not connect to that core node. +%% See mria_lb for more details. -spec set_core_node_discovery(boolean()) -> ok. set_core_node_discovery(What) when What =:= true; What =:= false -> - case role() of - core -> - ok; - replicant -> - ok = application:set_env(mria, core_node_discovery, What) - end. + ok = application:set_env(mria, core_node_discovery, What). -spec is_core_node_discovery_enabled() -> boolean(). is_core_node_discovery_enabled() -> - case role() of - core -> false; - replicant -> application:get_env(mria, core_node_discovery, true) - end. + application:get_env(mria, core_node_discovery, true). %%================================================================================ %% Internal diff --git a/src/mria_lb.erl b/src/mria_lb.erl index 7f73e70..8ac1aca 100644 --- a/src/mria_lb.erl +++ b/src/mria_lb.erl @@ -61,6 +61,9 @@ , db_nodes => [node()] , shard_badness => [{mria_rlog:shard(), float()}] , custom_info => _ + %% We can't prevent core node discovery if the node is still up and reachable. + %% Thus, a replicant can discover a core but must ignore it if discovery is disabled. + , discovery_enabled => boolean() }. -define(update, update). @@ -153,7 +156,13 @@ do_update(State = #s{core_nodes = OldCoreNodes, node_info = OldNodeInfo}) -> , ?MODULE, lb_callback, [] , mria_config:lb_timeout() ), - NodeInfo1 = [I || I = {_, #{whoami := core, running := true}} <- NodeInfo0], + NodeInfo1 = lists:filter(fun({_, #{whoami := Who, running := IsRunning} = I}) -> + %% Backward compatibility + IsDiscoverable = maps:get(discovery_enabled, I, true), + IsRunning andalso IsDiscoverable andalso Who =:= core + end, + NodeInfo0), + NodeInfo = maps:from_list(NodeInfo1), maybe_report_changes(OldNodeInfo, NodeInfo), %% Find partitions of the core cluster, and if the core cluster is @@ -324,6 +333,7 @@ lb_callback() -> , whoami => Whoami , protocol_version => mria_rlog:get_protocol_version() , custom_info => CustomInfo + , discovery_enabled => mria_config:is_core_node_discovery_enabled() }, MoreInfo = case Whoami of diff --git a/test/mria_lb_SUITE.erl b/test/mria_lb_SUITE.erl index f76d0f4..fff337d 100644 --- a/test/mria_lb_SUITE.erl +++ b/test/mria_lb_SUITE.erl @@ -234,6 +234,40 @@ t_core_node_leave(_Config) -> mria_ct:teardown_cluster(Cluster) end, []). +%% Check that removing a node from the cluster and disabling its rediscovery is handled correctly by the LB. +t_node_leave_disable_discovery(_Config) -> + Cluster = mria_ct:cluster([core, core, replicant], mria_mnesia_test_util:common_env()), + ?check_trace( + #{timetrap => 60000}, + try + {[C1, C2, R1], {ok, _}} = + ?wait_async_action( + begin + Nodes = [_, _, R1] = mria_ct:start_cluster(mria, Cluster), + mria_mnesia_test_util:wait_full_replication(Cluster, 5000), + {R1, mria_lb} ! update, + Nodes + end, + #{ ?snk_kind := mria_lb_core_discovery_new_nodes + , returned_cores := [_, _] + }, 10000), + %% Disable discovery and kick C2 from the cluster: + ?wait_async_action( + begin + erpc:call(C2, fun() -> ok = mria_config:set_core_node_discovery(false), + mria:leave() + end) + end, + #{ ?snk_kind := mria_lb_core_discovery_new_nodes + , node := _ + , previous_cores := [_, _] + , returned_cores := [_] + }, 10000), + ?assertEqual([C1], rpc:call(R1, mria_rlog, core_nodes, [])) + after + mria_ct:teardown_cluster(Cluster) + end, []). + t_custom_compat_check(_Config) -> Env = [ {mria, {callback, lb_custom_info_check}, fun(Val) -> Val =:= chosen_one end} | mria_mnesia_test_util:common_env()],