Skip to content

Commit

Permalink
rebalancer: introduce rebalancer_mode
Browse files Browse the repository at this point in the history
Closes #432

@TarantoolBot document
Title: vshard: rebalancer flag and mode
So far it was impossible to specify which instance should run the
rebalancer. It was always automatically assigned using some
internal rules based on UUIDs.

Now the users can choose:
- Which specific instance should run the rebalancer. Can be a
    replica or a master - won't matter.
- In which replicaset the instance to run the rebalancer should be
    selected automatically.
- Which instances and whole replicasets should not run the
    rebalancer even when it is selected automatically.

For that there are 2 new options: `rebalancer = <bool>` and
`rebalancer_mode = <name>`.

The `rebalancer` flag can be either omitted, or set to true, or
false. It can be set for replicasets and for specific instances.
There can be only one `rebalancer = true` in the whole config. But
can be many `rebalancer = false`.

* `rebalancer = true` assigned to an instance means that this
    instance is guaranteed to run the rebalancer service on it.
    The instance role doesn't matter - it can be a replica or a
    master. Will run the rebalancer anyway.

* `rebalancer = true` assigned to a replicaset means that the
    service will run only on the master of this replicaset. Can be
    combined with `master = 'auto'` on the given replicaset.

* `rebalancer = false` assigned to an instance means that it will
    not run the rebalancer.

* `rebalancer = false` assigned to a replicaset means that all the
    instances of this replicaset will not run the rebalancer.

* `rebalancer = nil` (same as omitted, default) means that the
    instance/replicaset will be eligible to run the rebalancer
    only if `rebalancer_mode = 'auto'` is set and there are no
    `rebalancer = true` anywhere.

The option `rebalancer_mode` should be specified in the root of
the config. It can have one of those values:

* `'auto'` - default. Means that the rebalancer service location
    is chosen automatically among all master instances in the
    cluster. Excluding those which have `rebalancer = false` on
    them or on their replicaset. If there are any
    `rebalancer = true`, then this mode works the same as
    `'manual'`.

* `'manual'`. The rebalancer will run only if there is at least
    one `rebalancer = true` in the config. And only on the given
    instance / replicaset (depending on at which level the flag
    was specified - for a specific instance or for a whole
    replicaset).

* `'off'`. The rebalancer will not run anywhere, regardless of all
    the `rebalancer = true/false` specified in the config.
  • Loading branch information
Gerold103 committed Nov 15, 2023
1 parent 9f00584 commit feddae2
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 1 deletion.
59 changes: 59 additions & 0 deletions test/storage-luatest/rebalancer_test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -233,3 +233,62 @@ test_group.test_locate_with_flag = function(g)
vtest.cluster_cfg(g, global_cfg)
wait_rebalancer_on_instance(g, 'replica_1_a')
end

test_group.test_rebalancer_mode = function(g)
local new_cfg_template = table.deepcopy(cfg_template)
--
-- Auto-mode won't ignore rebalancer flags. It can only do any difference
-- when the rebalancer is not specified explicitly.
--
new_cfg_template.rebalancer_mode = 'auto'
new_cfg_template.sharding[1].rebalancer = nil
new_cfg_template.sharding[2].rebalancer = true
local new_global_cfg = vtest.config_new(new_cfg_template)
vtest.cluster_cfg(g, new_global_cfg)
wait_rebalancer_on_instance(g, 'replica_2_a')
--
-- The rebalancer false-flags are taken into account.
--
new_cfg_template.sharding[1].rebalancer = false
new_cfg_template.sharding[2].rebalancer = false
new_global_cfg = vtest.config_new(new_cfg_template)
vtest.cluster_cfg(g, new_global_cfg)
wait_rebalancer_on_instance(g, 'replica_3_a')
--
-- The flags don't matter then the rebalancer is off.
--
new_cfg_template.rebalancer_mode = 'off'
new_global_cfg = vtest.config_new(new_cfg_template)
vtest.cluster_cfg(g, new_global_cfg)
wait_rebalancer_on_instance(g, nil)
--
-- Manual with a rebalancer assigned explicitly to an instance.
--
new_cfg_template.rebalancer_mode = 'manual'
new_cfg_template.sharding[2].rebalancer = nil
new_cfg_template.sharding[2].replicas.replica_2_b.rebalancer = true
new_global_cfg = vtest.config_new(new_cfg_template)
vtest.cluster_cfg(g, new_global_cfg)
wait_rebalancer_on_instance(g, 'replica_2_b')
--
-- Manual with a rebalancer assigned explicitly to a replicaset.
--
new_cfg_template.rebalancer_mode = 'manual'
new_cfg_template.sharding[2].replicas.replica_2_b.rebalancer = nil
new_cfg_template.sharding[3].rebalancer = true
new_global_cfg = vtest.config_new(new_cfg_template)
vtest.cluster_cfg(g, new_global_cfg)
wait_rebalancer_on_instance(g, 'replica_3_a')
--
-- Manual with no explicitly assigned rebalancer means no rebalancer at all.
--
new_cfg_template.sharding[3].rebalancer = nil
new_global_cfg = vtest.config_new(new_cfg_template)
vtest.cluster_cfg(g, new_global_cfg)
wait_rebalancer_on_instance(g, nil)
--
-- Cleanup.
--
vtest.cluster_cfg(g, global_cfg)
wait_rebalancer_on_instance(g, 'replica_1_a')
end
39 changes: 39 additions & 0 deletions test/unit-luatest/config_test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -211,3 +211,42 @@ g.test_rebalancer_flag = function()
replicaset_1.rebalancer = nil
storage_1_a.rebalancer = nil
end

g.test_rebalancer_mode = function()
local storage_1_a = {
uri = 'storage:[email protected]:3301',
name = 'storage_1_a',
}
local replicaset_1 = {
replicas = {
storage_1_a_uuid = storage_1_a,
},
}
local config = {
sharding = {
storage_1_uuid = replicaset_1,
},
}
t.assert(vcfg.check(config))

local function check_all_flag_combinations()
t.assert(vcfg.check(config))
storage_1_a.rebalancer = true
t.assert(vcfg.check(config))
storage_1_a.rebalancer = nil
replicaset_1.rebalancer = true
t.assert(vcfg.check(config))
replicaset_1.rebalancer = false
t.assert(vcfg.check(config))
replicaset_1.rebalancer = nil
storage_1_a.rebalancer = false
t.assert(vcfg.check(config))
storage_1_a.rebalancer = nil
end
config.rebalancer_mode = 'auto'
check_all_flag_combinations()
config.rebalancer_mode = 'manual'
check_all_flag_combinations()
config.rebalancer_mode = 'off'
check_all_flag_combinations()
end
13 changes: 13 additions & 0 deletions vshard/cfg.lua
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,12 @@ local function check_discovery_mode(value)
end
end

local function check_rebalancer_mode(value)
if value ~= 'auto' and value ~= 'manual' and value ~= 'off' then
error("Expected 'auto', 'manual', or 'off' for rebalancer_mode")
end
end

local function check_sharding(sharding)
local uuids = {}
local uris = {}
Expand Down Expand Up @@ -319,6 +325,13 @@ local cfg_template = {
default = consts.DEFAULT_REBALANCER_MAX_SENDING,
max = consts.REBALANCER_MAX_SENDING_MAX
},
rebalancer_mode = {
type = 'string',
name = 'Rebalancer mode',
is_optional = true,
default = 'auto',
check = check_rebalancer_mode,
},
collect_bucket_garbage_interval = {
name = 'Garbage bucket collect interval', is_deprecated = true,
reason = 'Has no effect anymore'
Expand Down
10 changes: 9 additions & 1 deletion vshard/storage/init.lua
Original file line number Diff line number Diff line change
Expand Up @@ -3238,8 +3238,10 @@ end
-- Find UUID of the instance which should run the rebalancer service.
--
local function rebalancer_cfg_find_instance(cfg)
assert(cfg.rebalancer_mode ~= 'off')
local target_uuid
local is_assigned
local is_auto = cfg.rebalancer_mode == 'auto'
for _, rs in pairs(cfg.sharding) do
if rs.rebalancer == false then
goto next_rs
Expand All @@ -3253,7 +3255,7 @@ local function rebalancer_cfg_find_instance(cfg)
end
local ok = true
ok = ok and not no_rebalancer
ok = ok and (replica.master or replica.rebalancer)
ok = ok and ((is_auto and replica.master) or replica.rebalancer)
ok = ok and (not target_uuid or replica_uuid < target_uuid)
ok = ok and (not is_assigned or is_rebalancer)
if ok then
Expand All @@ -3266,8 +3268,10 @@ local function rebalancer_cfg_find_instance(cfg)
end

local function rebalancer_cfg_find_replicaset(cfg)
assert(cfg.rebalancer_mode ~= 'off')
local target_uuid
local is_assigned
local is_auto = cfg.rebalancer_mode == 'auto'
for rs_uuid, rs in pairs(cfg.sharding) do
local is_rebalancer = rs.rebalancer
local no_rebalancer = rs.rebalancer == false
Expand All @@ -3278,6 +3282,7 @@ local function rebalancer_cfg_find_replicaset(cfg)
local ok = true
ok = ok and not no_rebalancer
ok = ok and (rs.master == 'auto')
ok = ok and (is_auto or is_rebalancer)
ok = ok and (not target_uuid or rs_uuid < target_uuid)
ok = ok and (not is_assigned or is_rebalancer)
if ok then
Expand All @@ -3293,6 +3298,9 @@ local function rebalancer_is_needed()
return false
end
local cfg = M.current_cfg
if cfg.rebalancer_mode == 'off' then
return false
end
local this_replica_uuid = M.this_replica.uuid
local this_replicaset_uuid = M.this_replicaset.uuid

Expand Down

0 comments on commit feddae2

Please sign in to comment.