Skip to content

Commit

Permalink
Merge pull request #178 from SergeTupchiy/EMQX-11826-prevent-replican…
Browse files Browse the repository at this point in the history
…ts-from-rejoining-a-left-core

fix: prevent replicants from joining core nodes that disabled node discovery
  • Loading branch information
SergeTupchiy authored Apr 9, 2024
2 parents 401424d + f4395f5 commit 21c5954
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 11 deletions.
16 changes: 6 additions & 10 deletions src/mria_config.erl
Original file line number Diff line number Diff line change
Expand Up @@ -252,21 +252,17 @@ set_extra_mnesia_diagnostic_checks(Checks) when is_list(Checks) ->
get_extra_mnesia_diagnostic_checks() ->
persistent_term:get(?mria(extra_mnesia_diagnostic_checks), []).

%% When `core_node_discovery = false` on a replicant node,
%% the replicant will stop discovering core nodes.
%% When it is `false` on a core node, other replicants will not connect to that core node.
%% See mria_lb for more details.
-spec set_core_node_discovery(boolean()) -> ok.
set_core_node_discovery(What) when What =:= true; What =:= false ->
case role() of
core ->
ok;
replicant ->
ok = application:set_env(mria, core_node_discovery, What)
end.
ok = application:set_env(mria, core_node_discovery, What).

-spec is_core_node_discovery_enabled() -> boolean().
is_core_node_discovery_enabled() ->
case role() of
core -> false;
replicant -> application:get_env(mria, core_node_discovery, true)
end.
application:get_env(mria, core_node_discovery, true).

%%================================================================================
%% Internal
Expand Down
12 changes: 11 additions & 1 deletion src/mria_lb.erl
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@
, db_nodes => [node()]
, shard_badness => [{mria_rlog:shard(), float()}]
, custom_info => _
%% We can't prevent core node discovery if the node is still up and reachable.
%% Thus, a replicant can discover a core but must ignore it if discovery is disabled.
, discovery_enabled => boolean()
}.

-define(update, update).
Expand Down Expand Up @@ -153,7 +156,13 @@ do_update(State = #s{core_nodes = OldCoreNodes, node_info = OldNodeInfo}) ->
, ?MODULE, lb_callback, []
, mria_config:lb_timeout()
),
NodeInfo1 = [I || I = {_, #{whoami := core, running := true}} <- NodeInfo0],
NodeInfo1 = lists:filter(fun({_, #{whoami := Who, running := IsRunning} = I}) ->
%% Backward compatibility
IsDiscoverable = maps:get(discovery_enabled, I, true),
IsRunning andalso IsDiscoverable andalso Who =:= core
end,
NodeInfo0),

NodeInfo = maps:from_list(NodeInfo1),
maybe_report_changes(OldNodeInfo, NodeInfo),
%% Find partitions of the core cluster, and if the core cluster is
Expand Down Expand Up @@ -324,6 +333,7 @@ lb_callback() ->
, whoami => Whoami
, protocol_version => mria_rlog:get_protocol_version()
, custom_info => CustomInfo
, discovery_enabled => mria_config:is_core_node_discovery_enabled()
},
MoreInfo =
case Whoami of
Expand Down
34 changes: 34 additions & 0 deletions test/mria_lb_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,40 @@ t_core_node_leave(_Config) ->
mria_ct:teardown_cluster(Cluster)
end, []).

%% Check that removing a node from the cluster and disabling its rediscovery is handled correctly by the LB.
t_node_leave_disable_discovery(_Config) ->
Cluster = mria_ct:cluster([core, core, replicant], mria_mnesia_test_util:common_env()),
?check_trace(
#{timetrap => 60000},
try
{[C1, C2, R1], {ok, _}} =
?wait_async_action(
begin
Nodes = [_, _, R1] = mria_ct:start_cluster(mria, Cluster),
mria_mnesia_test_util:wait_full_replication(Cluster, 5000),
{R1, mria_lb} ! update,
Nodes
end,
#{ ?snk_kind := mria_lb_core_discovery_new_nodes
, returned_cores := [_, _]
}, 10000),
%% Disable discovery and kick C2 from the cluster:
?wait_async_action(
begin
erpc:call(C2, fun() -> ok = mria_config:set_core_node_discovery(false),
mria:leave()
end)
end,
#{ ?snk_kind := mria_lb_core_discovery_new_nodes
, node := _
, previous_cores := [_, _]
, returned_cores := [_]
}, 10000),
?assertEqual([C1], rpc:call(R1, mria_rlog, core_nodes, []))
after
mria_ct:teardown_cluster(Cluster)
end, []).

t_custom_compat_check(_Config) ->
Env = [ {mria, {callback, lb_custom_info_check}, fun(Val) -> Val =:= chosen_one end}
| mria_mnesia_test_util:common_env()],
Expand Down

0 comments on commit 21c5954

Please sign in to comment.