From 8a93ff3d2d7a245bc7f6bfc787ee5d6e72f54d3d Mon Sep 17 00:00:00 2001 From: Charlie Voiselle <464492+angrycub@users.noreply.github.com> Date: Wed, 4 Oct 2023 12:20:27 -0400 Subject: [PATCH] [server] Directed leadership transfer CLI and API (#17383) * Add directed leadership transfer func * Add leadership transfer RPC endpoint * Add ACL tests for leadership-transfer endpoint * Add HTTP API route and implementation * Add to Go API client * Implement CLI command * Add documentation * Add changelog Co-authored-by: Tim Gross --- .changelog/17383.txt | 3 + api/operator.go | 49 ++++ command/agent/operator_endpoint.go | 56 ++++- command/agent/operator_endpoint_test.go | 140 ++++++++++++ command/commands.go | 5 + command/operator_raft_leadership_transfer.go | 125 +++++++++++ nomad/leader.go | 44 ++++ nomad/operator_endpoint.go | 128 ++++++++++- nomad/operator_endpoint_test.go | 212 ++++++++++++++++++ nomad/structs/operator.go | 59 +++++ website/content/api-docs/operator/raft.mdx | 150 +++++++++++-- .../operator/raft/transfer-leadership.mdx | 57 +++++ website/data/docs-nav-data.json | 4 + 13 files changed, 1008 insertions(+), 24 deletions(-) create mode 100644 .changelog/17383.txt create mode 100644 command/operator_raft_leadership_transfer.go create mode 100644 website/content/docs/commands/operator/raft/transfer-leadership.mdx diff --git a/.changelog/17383.txt b/.changelog/17383.txt new file mode 100644 index 00000000000..bfe3b594329 --- /dev/null +++ b/.changelog/17383.txt @@ -0,0 +1,3 @@ +```release-note:improvement +server: Added transfer-leadership API and CLI +``` diff --git a/api/operator.go b/api/operator.go index 32faf354661..507ea5cad2d 100644 --- a/api/operator.go +++ b/api/operator.go @@ -120,6 +120,46 @@ func (op *Operator) RaftRemovePeerByID(id string, q *WriteOptions) error { return nil } +// RaftTransferLeadershipByAddress is used to transfer leadership to a +// different peer using its address in the form of "IP:port". +func (op *Operator) RaftTransferLeadershipByAddress(address string, q *WriteOptions) error { + r, err := op.c.newRequest("PUT", "/v1/operator/raft/transfer-leadership") + if err != nil { + return err + } + r.setWriteOptions(q) + + r.params.Set("address", address) + + _, resp, err := requireOK(op.c.doRequest(r)) + if err != nil { + return err + } + + resp.Body.Close() + return nil +} + +// RaftTransferLeadershipByID is used to transfer leadership to a +// different peer using its Raft ID. +func (op *Operator) RaftTransferLeadershipByID(id string, q *WriteOptions) error { + r, err := op.c.newRequest("PUT", "/v1/operator/raft/transfer-leadership") + if err != nil { + return err + } + r.setWriteOptions(q) + + r.params.Set("id", id) + + _, resp, err := requireOK(op.c.doRequest(r)) + if err != nil { + return err + } + + resp.Body.Close() + return nil +} + // SchedulerConfiguration is the config for controlling scheduler behavior type SchedulerConfiguration struct { // SchedulerAlgorithm lets you select between available scheduling algorithms. @@ -363,3 +403,12 @@ func (op *Operator) LicenseGet(q *QueryOptions) (*LicenseReply, *QueryMeta, erro return &reply, qm, nil } + +type LeadershipTransferResponse struct { + From RaftServer + To RaftServer + Noop bool + Err error + + WriteMeta +} diff --git a/command/agent/operator_endpoint.go b/command/agent/operator_endpoint.go index 113ac56150d..c79641e02eb 100644 --- a/command/agent/operator_endpoint.go +++ b/command/agent/operator_endpoint.go @@ -30,6 +30,8 @@ func (s *HTTPServer) OperatorRequest(resp http.ResponseWriter, req *http.Request return s.OperatorRaftConfiguration(resp, req) case strings.HasPrefix(path, "peer"): return s.OperatorRaftPeer(resp, req) + case strings.HasPrefix(path, "transfer-leadership"): + return s.OperatorRaftTransferLeadership(resp, req) default: return nil, CodedError(404, ErrInvalidMethod) } @@ -56,8 +58,7 @@ func (s *HTTPServer) OperatorRaftConfiguration(resp http.ResponseWriter, req *ht return reply, nil } -// OperatorRaftPeer supports actions on Raft peers. Currently we only support -// removing peers by address. +// OperatorRaftPeer supports actions on Raft peers. func (s *HTTPServer) OperatorRaftPeer(resp http.ResponseWriter, req *http.Request) (interface{}, error) { if req.Method != http.MethodDelete { return nil, CodedError(404, ErrInvalidMethod) @@ -97,6 +98,57 @@ func (s *HTTPServer) OperatorRaftPeer(resp http.ResponseWriter, req *http.Reques return nil, nil } +// OperatorRaftTransferLeadership supports actions on Raft peers. +func (s *HTTPServer) OperatorRaftTransferLeadership(resp http.ResponseWriter, req *http.Request) (interface{}, error) { + if req.Method != http.MethodPost && req.Method != http.MethodPut { + return nil, CodedError(http.StatusMethodNotAllowed, ErrInvalidMethod) + } + + params := req.URL.Query() + + // Using the params map directly + id, hasID := params["id"] + addr, hasAddress := params["address"] + + // There are some items that we can parse for here that are more unwieldy in + // the Validate() func on the RPC request object, like repeated query params. + switch { + case !hasID && !hasAddress: + return nil, CodedError(http.StatusBadRequest, "must specify id or address") + case hasID && hasAddress: + return nil, CodedError(http.StatusBadRequest, "must specify either id or address") + case hasID && id[0] == "": + return nil, CodedError(http.StatusBadRequest, "id must be non-empty") + case hasID && len(id) > 1: + return nil, CodedError(http.StatusBadRequest, "must specify only one id") + case hasAddress && addr[0] == "": + return nil, CodedError(http.StatusBadRequest, "address must be non-empty") + case hasAddress && len(addr) > 1: + return nil, CodedError(http.StatusBadRequest, "must specify only one address") + } + + var out structs.LeadershipTransferResponse + args := &structs.RaftPeerRequest{} + s.parseWriteRequest(req, &args.WriteRequest) + + if hasID { + args.ID = raft.ServerID(id[0]) + } else { + args.Address = raft.ServerAddress(addr[0]) + } + + if err := args.Validate(); err != nil { + return nil, CodedError(http.StatusBadRequest, err.Error()) + } + + err := s.agent.RPC("Operator.TransferLeadershipToPeer", &args, &out) + if err != nil { + return nil, err + } + + return out, nil +} + // OperatorAutopilotConfiguration is used to inspect the current Autopilot configuration. // This supports the stale query mode in case the cluster doesn't have a leader. func (s *HTTPServer) OperatorAutopilotConfiguration(resp http.ResponseWriter, req *http.Request) (interface{}, error) { diff --git a/command/agent/operator_endpoint_test.go b/command/agent/operator_endpoint_test.go index 12535366c57..2d5590ffd06 100644 --- a/command/agent/operator_endpoint_test.go +++ b/command/agent/operator_endpoint_test.go @@ -20,6 +20,8 @@ import ( "github.com/hashicorp/nomad/api" "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/helper/pointer" + "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/shoenig/test/must" @@ -91,6 +93,144 @@ func TestHTTP_OperatorRaftPeer(t *testing.T) { }) } +func TestHTTP_OperatorRaftTransferLeadership(t *testing.T) { + ci.Parallel(t) + configCB := func(c *Config) { + c.Client.Enabled = false + c.Server.NumSchedulers = pointer.Of(0) + } + + httpTest(t, configCB, func(s *TestAgent) { + body := bytes.NewBuffer(nil) + badMethods := []string{ + http.MethodConnect, + http.MethodDelete, + http.MethodGet, + http.MethodHead, + http.MethodOptions, + http.MethodPatch, + http.MethodTrace, + } + for _, tc := range badMethods { + tc := tc + t.Run(tc+" method errors", func(t *testing.T) { + req, err := http.NewRequest(tc, "/v1/operator/raft/transfer-leadership?address=nope", body) + must.NoError(t, err) + + resp := httptest.NewRecorder() + _, err = s.Server.OperatorRaftTransferLeadership(resp, req) + + must.Error(t, err) + must.ErrorContains(t, err, "Invalid method") + body.Reset() + }) + } + + apiErrTCs := []struct { + name string + qs string + expected string + }{ + { + name: "URL with id and address errors", + qs: `?id=foo&address=bar`, + expected: "must specify either id or address", + }, + { + name: "URL without id and address errors", + qs: ``, + expected: "must specify id or address", + }, + { + name: "URL with multiple id errors", + qs: `?id=foo&id=bar`, + expected: "must specify only one id", + }, + { + name: "URL with multiple address errors", + qs: `?address=foo&address=bar`, + expected: "must specify only one address", + }, + { + name: "URL with an empty id errors", + qs: `?id`, + expected: "id must be non-empty", + }, + { + name: "URL with an empty address errors", + qs: `?address`, + expected: "address must be non-empty", + }, + { + name: "an invalid id errors", + qs: `?id=foo`, + expected: "id must be a uuid", + }, + { + name: "URL with an empty address errors", + qs: `?address=bar`, + expected: "address must be in IP:port format", + }, + } + for _, tc := range apiErrTCs { + tc := tc + t.Run(tc.name, func(t *testing.T) { + req, err := http.NewRequest( + http.MethodPut, + "/v1/operator/raft/transfer-leadership"+tc.qs, + body, + ) + must.NoError(t, err) + + resp := httptest.NewRecorder() + _, err = s.Server.OperatorRaftTransferLeadership(resp, req) + + must.Error(t, err) + must.ErrorContains(t, err, tc.expected) + body.Reset() + }) + } + }) + + testID := uuid.Generate() + apiOkTCs := []struct { + name string + qs string + expected string + }{ + { + "id", + "?id=" + testID, + `id "` + testID + `" was not found in the Raft configuration`, + }, + { + "address", + "?address=9.9.9.9:8000", + `address "9.9.9.9:8000" was not found in the Raft configuration`, + }, + } + for _, tc := range apiOkTCs { + tc := tc + t.Run(tc.name+" can roundtrip", func(t *testing.T) { + httpTest(t, configCB, func(s *TestAgent) { + body := bytes.NewBuffer(nil) + req, err := http.NewRequest( + http.MethodPut, + "/v1/operator/raft/transfer-leadership"+tc.qs, + body, + ) + must.NoError(t, err) + + // If we get this error, it proves we sent the parameter all the + // way through. + resp := httptest.NewRecorder() + _, err = s.Server.OperatorRaftTransferLeadership(resp, req) + must.ErrorContains(t, err, tc.expected) + }) + }) + } +} + func TestOperator_AutopilotGetConfiguration(t *testing.T) { ci.Parallel(t) httpTest(t, nil, func(s *TestAgent) { diff --git a/command/commands.go b/command/commands.go index f833b1847e2..d665c80e5e8 100644 --- a/command/commands.go +++ b/command/commands.go @@ -749,6 +749,11 @@ func Commands(metaPtr *Meta, agentUi cli.Ui) map[string]cli.CommandFactory { Meta: meta, }, nil }, + "operator raft transfer-leadership": func() (cli.Command, error) { + return &OperatorRaftTransferLeadershipCommand{ + Meta: meta, + }, nil + }, "operator raft info": func() (cli.Command, error) { return &OperatorRaftInfoCommand{ Meta: meta, diff --git a/command/operator_raft_leadership_transfer.go b/command/operator_raft_leadership_transfer.go new file mode 100644 index 00000000000..121c91574de --- /dev/null +++ b/command/operator_raft_leadership_transfer.go @@ -0,0 +1,125 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +package command + +import ( + "fmt" + "strings" + + "github.com/hashicorp/nomad/api" + "github.com/posener/complete" +) + +type OperatorRaftTransferLeadershipCommand struct { + Meta +} + +func (c *OperatorRaftTransferLeadershipCommand) Help() string { + helpText := ` +Usage: nomad operator raft transfer-leadership [options] + + Transfer leadership to the Nomad server with given -peer-address or + -peer-id in the Raft configuration. All server nodes in the cluster + must be running at least Raft protocol v3 in order to use this command. + + There are cases where you might desire transferring leadership from one + cluster member to another, for example, during a rolling upgrade. This + command allows you to designate a new server to be cluster leader. + + Note: This command requires a currently established leader to function. + + If ACLs are enabled, this command requires a management token. + +General Options: + + ` + generalOptionsUsage(usageOptsDefault|usageOptsNoNamespace) + ` + +Remove Peer Options: + + -peer-address="IP:port" + Transfer leadership to the Nomad server with given Raft address. + + -peer-id="id" + Transfer leadership to the Nomad server with given Raft ID. +` + + return strings.TrimSpace(helpText) +} + +func (c *OperatorRaftTransferLeadershipCommand) AutocompleteFlags() complete.Flags { + return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient), + complete.Flags{ + "-peer-address": complete.PredictAnything, + "-peer-id": complete.PredictAnything, + }) +} + +func (c *OperatorRaftTransferLeadershipCommand) AutocompleteArgs() complete.Predictor { + return complete.PredictNothing +} + +func (c *OperatorRaftTransferLeadershipCommand) Synopsis() string { + return "Transfer leadership to a specified Nomad server" +} + +func (c *OperatorRaftTransferLeadershipCommand) Name() string { + return "operator raft transfer-leadership" +} + +func (c *OperatorRaftTransferLeadershipCommand) Run(args []string) int { + var peerAddress string + var peerID string + + flags := c.Meta.FlagSet("raft", FlagSetClient) + flags.Usage = func() { c.Ui.Output(c.Help()) } + + flags.StringVar(&peerAddress, "peer-address", "", "") + flags.StringVar(&peerID, "peer-id", "", "") + if err := flags.Parse(args); err != nil { + c.Ui.Error(fmt.Sprintf("Failed to parse args: %v", err)) + return 1 + } + + // Set up a client. + client, err := c.Meta.Client() + if err != nil { + c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err)) + return 1 + } + operator := client.Operator() + + if err := raftTransferLeadership(peerAddress, peerID, operator); err != nil { + c.Ui.Error(fmt.Sprintf("Error transferring leadership to peer: %v", err)) + return 1 + } + if peerAddress != "" { + c.Ui.Output(fmt.Sprintf("Transferred leadership to peer with address %q", peerAddress)) + } else { + c.Ui.Output(fmt.Sprintf("Transferred leadership to peer with id %q", peerID)) + } + + return 0 +} + +func raftTransferLeadership(address, id string, operator *api.Operator) error { + if len(address) == 0 && len(id) == 0 { + return fmt.Errorf("an address or id is required for the destination peer") + } + if len(address) > 0 && len(id) > 0 { + return fmt.Errorf("cannot give both an address and id") + } + + // Try to perform the leadership transfer. + if len(address) > 0 { + if err := operator.RaftTransferLeadershipByAddress(address, nil); err != nil { + return err + } + } else { + if err := operator.RaftTransferLeadershipByID(id, nil); err != nil { + return err + } + } + + return nil +} diff --git a/nomad/leader.go b/nomad/leader.go index 719160fa589..8d43d4ea9b2 100644 --- a/nomad/leader.go +++ b/nomad/leader.go @@ -149,6 +149,50 @@ func (s *Server) monitorLeadership() { } } +func (s *Server) leadershipTransferToServer(to structs.RaftIDAddress) error { + if l := structs.NewRaftIDAddress(s.raft.LeaderWithID()); l == to { + s.logger.Debug("leadership transfer to current leader is a no-op") + return nil + } + retryCount := 3 + var lastError error + for i := 0; i < retryCount; i++ { + err := s.raft.LeadershipTransferToServer(to.ID, to.Address).Error() + if err == nil { + s.logger.Info("successfully transferred leadership") + return nil + } + + // "cannot transfer leadership to itself" + // Handled at top of function, but reapplied here to prevent retrying if + // it occurs while we are retrying + if err.Error() == "cannot transfer leadership to itself" { + s.logger.Debug("leadership transfer to current leader is a no-op") + return nil + } + + // ErrRaftShutdown: Don't retry if raft is shut down. + if err == raft.ErrRaftShutdown { + return err + } + + // ErrUnsupportedProtocol: Don't retry if the Raft version doesn't + // support leadership transfer since this will never succeed. + if err == raft.ErrUnsupportedProtocol { + return fmt.Errorf("leadership transfer not supported with Raft version lower than 3") + } + + // ErrEnqueueTimeout: This seems to be the valid time to retry. + s.logger.Error("failed to transfer leadership attempt, will retry", + "attempt", i, + "retry_limit", retryCount, + "error", err, + ) + lastError = err + } + return fmt.Errorf("failed to transfer leadership in %d attempts. last error: %w", retryCount, lastError) +} + func (s *Server) leadershipTransfer() error { retryCount := 3 for i := 0; i < retryCount; i++ { diff --git a/nomad/operator_endpoint.go b/nomad/operator_endpoint.go index e6b018537e2..dd20c084636 100644 --- a/nomad/operator_endpoint.go +++ b/nomad/operator_endpoint.go @@ -8,6 +8,7 @@ import ( "fmt" "io" "net" + "net/http" "time" "github.com/hashicorp/go-hclog" @@ -124,7 +125,7 @@ func (op *Operator) RaftRemovePeerByAddress(args *structs.RaftPeerByAddressReque // Since this is an operation designed for humans to use, we will return // an error if the supplied address isn't among the peers since it's - // likely they screwed up. + // likely a mistake. { future := op.srv.raft.GetConfiguration() if err := future.Error(); err != nil { @@ -182,7 +183,7 @@ func (op *Operator) RaftRemovePeerByID(args *structs.RaftPeerByIDRequest, reply // Since this is an operation designed for humans to use, we will return // an error if the supplied id isn't among the peers since it's - // likely they screwed up. + // likely a mistake. var address raft.ServerAddress { future := op.srv.raft.GetConfiguration() @@ -228,6 +229,127 @@ REMOVE: return nil } +// TransferLeadershipToPeer is used to transfer leadership away from the +// current leader to a specific target peer. This can help prevent leadership +// flapping during a rolling upgrade by allowing the cluster operator to target +// an already upgraded node before upgrading the remainder of the cluster. +func (op *Operator) TransferLeadershipToPeer(req *structs.RaftPeerRequest, reply *structs.LeadershipTransferResponse) error { + // Populate the reply's `To` with the arguments. Only one of them is likely + // to be filled. We don't get any additional information until after auth + // to prevent leaking cluster details via the error response. + reply.To = structs.NewRaftIDAddress(req.Address, req.ID) + + authErr := op.srv.Authenticate(op.ctx, req) + + if done, err := op.srv.forward("Operator.TransferLeadershipToPeer", req, req, reply); done { + reply.Err = err + return reply.Err + } + op.srv.MeasureRPCRate("operator", structs.RateMetricWrite, req) + if authErr != nil { + reply.Err = structs.ErrPermissionDenied + return structs.ErrPermissionDenied + } + + // Check ACL permissions + if aclObj, err := op.srv.ResolveACL(req); err != nil { + return err + } else if aclObj != nil && !aclObj.IsManagement() { + reply.Err = structs.ErrPermissionDenied + return structs.ErrPermissionDenied + } + + // Technically, this code will be running on the leader because of the RPC + // forwarding, but a leadership change could happen at any moment while we're + // running. We need the leader's raft info to populate the response struct + // anyway, so we have a chance to check again here + + reply.From = structs.NewRaftIDAddress(op.srv.raft.LeaderWithID()) + + // If the leader information comes back empty, that signals that there is + // currently no leader. + if reply.From.Address == "" || reply.From.ID == "" { + reply.Err = structs.ErrNoLeader + return structs.NewErrRPCCoded(http.StatusServiceUnavailable, structs.ErrNoLeader.Error()) + } + + // while this is a somewhat more expensive test than later ones, if this + // test fails, they will _never_ be able to do a transfer. We do this after + // ACL checks though, so as to not leak cluster info to non-validated users. + minRaftProtocol, err := op.srv.MinRaftProtocol() + if err != nil { + reply.Err = err + return structs.NewErrRPCCoded(http.StatusInternalServerError, err.Error()) + } + + // TransferLeadership is not supported until Raft protocol v3 or greater. + if minRaftProtocol < 3 { + op.logger.Warn("unsupported minimum common raft protocol version", "required", "3", "current", minRaftProtocol) + reply.Err = errors.New("unsupported minimum common raft protocol version") + return structs.NewErrRPCCoded(http.StatusBadRequest, reply.Err.Error()) + } + + var kind, testedVal string + + // The request must provide either an ID or an Address, this lets us validate + // the request + req.Validate() + switch { + case req.ID != "": + kind, testedVal = "id", string(req.ID) + case req.Address != "": + kind, testedVal = "address", string(req.Address) + default: + reply.Err = errors.New("must provide peer id or address") + return structs.NewErrRPCCoded(http.StatusBadRequest, reply.Err.Error()) + } + + // Get the raft configuration + future := op.srv.raft.GetConfiguration() + if err := future.Error(); err != nil { + reply.Err = err + return err + } + + // Since this is an operation designed for humans to use, we will return + // an error if the supplied ID or address isn't among the peers since it's + // likely a mistake. + var found bool + for _, s := range future.Configuration().Servers { + if s.ID == req.ID || s.Address == req.Address { + reply.To = structs.NewRaftIDAddress(s.Address, s.ID) + found = true + break + } + } + + if !found { + reply.Err = fmt.Errorf("%s %q was not found in the Raft configuration", + kind, testedVal) + return structs.NewErrRPCCoded(http.StatusBadRequest, reply.Err.Error()) + } + + // Otherwise, this is a no-op, respond accordingly. + if reply.From == reply.To { + op.logger.Debug("leadership transfer to current leader is a no-op") + reply.Noop = true + return nil + } + + log := op.logger.With( + "to_peer_id", reply.To.ID, "to_peer_addr", reply.To.Address, + "from_peer_id", reply.From.ID, "from_peer_addr", reply.From.Address, + ) + if err = op.srv.leadershipTransferToServer(reply.To); err != nil { + reply.Err = err + log.Error("failed transferring leadership", "error", reply.Err.Error()) + return err + } + + log.Info("transferred leadership") + return nil +} + // AutopilotGetConfiguration is used to retrieve the current Autopilot configuration. func (op *Operator) AutopilotGetConfiguration(args *structs.GenericRequest, reply *structs.AutopilotConfig) error { @@ -284,7 +406,7 @@ func (op *Operator) AutopilotSetConfiguration(args *structs.AutopilotSetConfigRe return structs.ErrPermissionDenied } - // All servers should be at or above 0.8.0 to apply this operatation + // All servers should be at or above 0.8.0 to apply this operation if !ServersMeetMinimumVersion(op.srv.Members(), op.srv.Region(), minAutopilotVersion, false) { return fmt.Errorf("All servers should be running version %v to update autopilot config", minAutopilotVersion) } diff --git a/nomad/operator_endpoint_test.go b/nomad/operator_endpoint_test.go index fdd9a62a496..6899a1d0b48 100644 --- a/nomad/operator_endpoint_test.go +++ b/nomad/operator_endpoint_test.go @@ -9,6 +9,7 @@ import ( "fmt" "io" "net" + "net/rpc" "os" "path" "reflect" @@ -27,10 +28,16 @@ import ( "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/testutil" "github.com/hashicorp/raft" + "github.com/shoenig/test/must" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) +var ( + // RPC Permission Denied Errors - currently `rpc error: Permission denied` + rpcPermDeniedErr = rpc.ServerError(structs.ErrPermissionDenied.Error()) +) + func TestOperator_RaftGetConfiguration(t *testing.T) { ci.Parallel(t) @@ -368,6 +375,211 @@ func TestOperator_RaftRemovePeerByID_ACL(t *testing.T) { } } +type testcluster struct { + t *testing.T + server []*Server + cleanup []func() + token *structs.ACLToken + rpc func(string, any, any) error +} + +func (tc testcluster) Cleanup() { + for _, cFn := range tc.cleanup { + cFn() + } +} + +type tcArgs struct { + size int + enableACL bool +} + +func newTestCluster(t *testing.T, args tcArgs) (tc testcluster) { + // handle the zero case reasonably for count + if args.size == 0 { + args.size = 3 + } + if args.size < 1 { + t.Fatal("newTestCluster must have size greater than zero") + } + cSize := args.size + out := testcluster{ + t: t, + server: make([]*Server, cSize), + cleanup: make([]func(), cSize), + } + + for i := 0; i < cSize; i += 1 { + out.server[i], out.cleanup[i] = TestServer(t, func(c *Config) { + c.NodeName = fmt.Sprintf("node-%v", i+1) + c.RaftConfig.ProtocolVersion = raft.ProtocolVersion(3) + c.BootstrapExpect = cSize + c.ACLEnabled = args.enableACL + }) + } + t.Cleanup(out.Cleanup) + out.rpc = out.server[0].RPC + + TestJoin(t, out.server...) + out.WaitForLeader() + + if args.enableACL { + // Bootstrap the ACL subsystem + token := mock.ACLManagementToken() + err := out.server[0].State().BootstrapACLTokens(structs.MsgTypeTestSetup, 1, 0, token) + if err != nil { + t.Fatalf("failed to bootstrap ACL token: %v", err) + } + t.Logf("bootstrap token: %v", *token) + out.token = token + } + return out +} + +func (tc testcluster) WaitForLeader() { + testutil.WaitForLeader(tc.t, tc.rpc) +} + +func (tc testcluster) leader() *Server { + tc.WaitForLeader() + for _, s := range tc.server { + if isLeader, _ := s.getLeader(); isLeader { + return s + } + } + return nil +} + +func (tc testcluster) anyFollower() *Server { + if len(tc.server) < 2 { + return nil + } + + testutil.WaitForLeader(tc.t, tc.rpc) + for _, s := range tc.server { + if isLeader, _ := s.getLeader(); !isLeader { + return s + } + } + // something weird happened. + return nil +} + +func TestOperator_TransferLeadershipToServerAddress_ACL(t *testing.T) { + ci.Parallel(t) + + tc := newTestCluster(t, tcArgs{enableACL: true}) + s1 := tc.leader() + codec := rpcClient(t, s1) + state := s1.fsm.State() + + lAddr, _ := s1.raft.LeaderWithID() + + var addr raft.ServerAddress + // Find the first non-leader server in the list. + for a := range s1.localPeers { + addr = a + if addr != lAddr { + break + } + } + + // Create ACL token + invalidToken := mock.CreatePolicyAndToken(t, state, 1001, "test-invalid", mock.NodePolicy(acl.PolicyWrite)) + + arg := structs.RaftPeerRequest{ + RaftIDAddress: structs.RaftIDAddress{Address: addr}, + WriteRequest: structs.WriteRequest{Region: s1.config.Region}, + } + + var reply struct{} + + t.Run("no-token", func(t *testing.T) { + // Try with no token and expect permission denied + err := msgpackrpc.CallWithCodec(codec, "Operator.TransferLeadershipToPeer", &arg, &reply) + must.Error(t, err) + must.ErrorIs(t, err, rpcPermDeniedErr) + }) + + t.Run("invalid-token", func(t *testing.T) { + // Try with an invalid token and expect permission denied + arg.AuthToken = invalidToken.SecretID + err := msgpackrpc.CallWithCodec(codec, "Operator.TransferLeadershipToPeer", &arg, &reply) + must.Error(t, err) + must.ErrorIs(t, err, rpcPermDeniedErr) + }) + + t.Run("good-token", func(t *testing.T) { + // Try with a management token + arg.AuthToken = tc.token.SecretID + err := msgpackrpc.CallWithCodec(codec, "Operator.TransferLeadershipToPeer", &arg, &reply) + must.NoError(t, err) + + // Is the expected leader the new one? + tc.WaitForLeader() + lAddrNew, _ := s1.raft.LeaderWithID() + must.Eq(t, addr, lAddrNew) + }) +} + +func TestOperator_TransferLeadershipToServerID_ACL(t *testing.T) { + ci.Parallel(t) + tc := newTestCluster(t, tcArgs{enableACL: true}) + s1 := tc.leader() + codec := rpcClient(t, s1) + state := s1.fsm.State() + + _, ldrID := s1.raft.LeaderWithID() + + var tgtID raft.ServerID + // Find the first non-leader server in the list. + for _, sp := range s1.localPeers { + tgtID = raft.ServerID(sp.ID) + if tgtID != ldrID { + break + } + } + + // Create ACL token + invalidToken := mock.CreatePolicyAndToken(t, state, 1001, "test-invalid", mock.NodePolicy(acl.PolicyWrite)) + + arg := structs.RaftPeerRequest{ + RaftIDAddress: structs.RaftIDAddress{ + ID: tgtID, + }, + WriteRequest: structs.WriteRequest{Region: s1.config.Region}, + } + + var reply struct{} + + t.Run("no-token", func(t *testing.T) { + // Try with no token and expect permission denied + err := msgpackrpc.CallWithCodec(codec, "Operator.TransferLeadershipToPeer", &arg, &reply) + must.Error(t, err) + must.ErrorIs(t, err, rpcPermDeniedErr) + }) + + t.Run("invalid-token", func(t *testing.T) { + // Try with an invalid token and expect permission denied + arg.AuthToken = invalidToken.SecretID + err := msgpackrpc.CallWithCodec(codec, "Operator.TransferLeadershipToPeer", &arg, &reply) + must.Error(t, err) + must.ErrorIs(t, err, rpcPermDeniedErr) + }) + + t.Run("good-token", func(t *testing.T) { + // Try with a management token + arg.AuthToken = tc.token.SecretID + err := msgpackrpc.CallWithCodec(codec, "Operator.TransferLeadershipToPeer", &arg, &reply) + must.NoError(t, err) + + // Is the expected leader the new one? + tc.WaitForLeader() + _, ldrID := s1.raft.LeaderWithID() + must.Eq(t, tgtID, ldrID) + }) +} + func TestOperator_SchedulerGetConfiguration(t *testing.T) { ci.Parallel(t) diff --git a/nomad/structs/operator.go b/nomad/structs/operator.go index 70a3704d93f..c68bd668d90 100644 --- a/nomad/structs/operator.go +++ b/nomad/structs/operator.go @@ -4,9 +4,12 @@ package structs import ( + "errors" "fmt" + "net/netip" "time" + "github.com/hashicorp/go-uuid" "github.com/hashicorp/raft" ) @@ -49,6 +52,8 @@ type RaftConfigurationResponse struct { // RaftPeerByAddressRequest is used by the Operator endpoint to apply a Raft // operation on a specific Raft peer by address in the form of "IP:port". +// +// Deprecated: Use RaftPeerRequest with an Address instead. type RaftPeerByAddressRequest struct { // Address is the peer to remove, in the form "IP:port". Address raft.ServerAddress @@ -59,6 +64,8 @@ type RaftPeerByAddressRequest struct { // RaftPeerByIDRequest is used by the Operator endpoint to apply a Raft // operation on a specific Raft peer by ID. +// +// Deprecated: Use RaftPeerRequest with an ID instead. type RaftPeerByIDRequest struct { // ID is the peer ID to remove. ID raft.ServerID @@ -67,6 +74,58 @@ type RaftPeerByIDRequest struct { WriteRequest } +// RaftPeerRequest is used by the Operator endpoint to apply a Raft +// operation on a specific Raft peer by its peer ID or address in the form of +// "IP:port". +type RaftPeerRequest struct { + // RaftIDAddress contains an ID and Address field to identify the target + RaftIDAddress + // WriteRequest holds the Region for this request. + WriteRequest +} + +func (r *RaftPeerRequest) Validate() error { + if (r.ID == "" && r.Address == "") || (r.ID != "" && r.Address != "") { + return errors.New("either ID or Address must be set") + } + if r.ID != "" { + return r.validateID() + } + return r.validateAddress() +} + +func (r *RaftPeerRequest) validateID() error { + if _, err := uuid.ParseUUID(string(r.ID)); err != nil { + return fmt.Errorf("id must be a uuid: %w", err) + } + return nil +} + +func (r *RaftPeerRequest) validateAddress() error { + if _, err := netip.ParseAddrPort(string(r.Address)); err != nil { + return fmt.Errorf("address must be in IP:port format: %w", err) + } + return nil +} + +type LeadershipTransferResponse struct { + From RaftIDAddress // Server yielding leadership + To RaftIDAddress // Server obtaining leadership + Noop bool // Was the transfer a non-operation + Err error // Non-nil if there was an error while transferring leadership +} + +type RaftIDAddress struct { + Address raft.ServerAddress + ID raft.ServerID +} + +// NewRaftIDAddress takes parameters in the order provided by raft's +// LeaderWithID func and returns a RaftIDAddress +func NewRaftIDAddress(a raft.ServerAddress, id raft.ServerID) RaftIDAddress { + return RaftIDAddress{ID: id, Address: a} +} + // AutopilotSetConfigRequest is used by the Operator endpoint to update the // current Autopilot configuration of the cluster. type AutopilotSetConfigRequest struct { diff --git a/website/content/api-docs/operator/raft.mdx b/website/content/api-docs/operator/raft.mdx index b6313223f07..fb77f6d8e60 100644 --- a/website/content/api-docs/operator/raft.mdx +++ b/website/content/api-docs/operator/raft.mdx @@ -2,7 +2,7 @@ layout: api page_title: Raft - Operator - HTTP API description: |- - The /operator/raft endpoints provide tools for management of the Raft subsystem. + The /operator/raft endpoints provide tools for management of the Raft subsystem. --- # Raft Operator HTTP API @@ -34,26 +34,56 @@ The table below shows this endpoint's support for ### Sample Request + + + +```shell-session +$ nomad operator api /v1/operator/raft/configuration +``` + + + + ```shell-session $ curl \ https://localhost:4646/v1/operator/raft/configuration ``` + + + ### Sample Response ```json { - "Index": 1, - "Servers": [ - { - "Address": "127.0.0.1:4647", - "ID": "127.0.0.1:4647", - "Leader": true, - "Node": "bacon-mac.global", - "RaftProtocol": 2, - "Voter": true - } - ] + "Index": 0, + "Servers": [ + { + "Address": "10.1.0.10:4647", + "ID": "c13f9998-a0f3-d765-0b52-55a0b3ce5f88", + "Leader": false, + "Node": "node1.global", + "RaftProtocol": "3", + "Voter": true + }, + { + "Address": "10.1.0.20:4647", + "ID": "d7927f2b-067f-45a4-6266-af8bb84de082", + "Leader": true, + "Node": "node2.global", + "RaftProtocol": "3", + "Voter": true + }, + { + "Address": "10.1.0.30:4647", + "ID": "00d56ef8-938e-abc3-6f8a-f8ac80a80fb9", + "Leader": false, + "Node": "node3.global", + "RaftProtocol": "3", + "Voter": true + } + + ] } ``` @@ -66,8 +96,8 @@ $ curl \ - `Servers` `(array: Server)` - The returned `Servers` array has information about the servers in the Raft peer configuration. - - `ID` `(string)` - The ID of the server. This is the same as the `Address` - but may be upgraded to a GUID in a future version of Nomad. + - `ID` `(string)` - The ID of the server. For Raft protocol v2, this is the + same as the `Address`. Raft protocol v3 uses GUIDs as the ID. - `Node` `(string)` - The node name of the server, as known to Nomad, or `"(unknown)"` if the node is stale and not known. @@ -100,18 +130,100 @@ The table below shows this endpoint's support for ### Parameters -- `address` `(string: )` - Specifies the server to remove as - `ip:port`. This cannot be provided along with the `id` parameter. +- `address` `(string: )` - Specifies the Raft **Address** of the + server to remove as provided in the output of `/v1/operator/raft/configuration` + API endpoint or the `nomad operator raft list-peers` command. -- `id` `(string: )` - Specifies the server to remove as - `id`. This cannot be provided along with the `address` parameter. +- `id` `(string: )` - Specifies the Raft **ID** of the server to + remove as provided in the output of `/v1/operator/raft/configuration` + API endpoint or the `nomad operator raft list-peers` command. + + + + Either `address` or `id` must be provided, but not both. + + ### Sample Request + + + + +```shell-session +$ nomad operator api -X DELETE \ + /v1/operator/raft/peer?address=1.2.3.4:4647 +``` + + + + ```shell-session $ curl \ --request DELETE \ - https://localhost:4646/v1/operator/raft/peer?address=1.2.3.4:4646 + --header "X-Nomad-Token: ${NOMAD_TOKEN}" + https://127.0.0.1:4646/v1/operator/raft/peer?address=1.2.3.4:4647 +``` + + + + +## Transfer Leadership to another Raft Peer + +This endpoint tells the current cluster leader to transfer leadership +to the Nomad server with given address or ID in the Raft +configuration. The return code signifies success or failure. + +| Method | Path | Produces | +| ------------------- | --------------------------------------- | ------------------ | +| `PUT`
`POST` | `/v1/operator/raft/transfer-leadership` | `application/json` | + +The table below shows this endpoint's support for +[blocking queries](/nomad/api-docs#blocking-queries) and +[required ACLs](/nomad/api-docs#acls). + +| Blocking Queries | ACL Required | +| ---------------- | ------------ | +| `NO` | `management` | + +### Parameters + +- `address` `(string: )` - Specifies the Raft **Address** of the + target server as provided in the output of `/v1/operator/raft/configuration` + API endpoint or the `nomad operator raft list-peers` command. + +- `id` `(string: )` - Specifies the Raft **ID** of the target server + as provided in the output of `/v1/operator/raft/configuration` API endpoint or + the `nomad operator raft list-peers` command. + + + +- The cluster must be running Raft protocol v3 or greater on all server members. + +- Either `address` or `id` must be provided, but not both. + + + +### Sample Requests + + + + +```shell-session +$ nomad operator api -X PUT \ + "/v1/operator/raft/transfer-leadership?address=1.2.3.4:4647" ``` + + + +```shell-session +$ curl --request PUT \ + --header "X-Nomad-Token: ${NOMAD_TOKEN}" + "https://127.0.0.1:4646/v1/operator/raft/transfer-leadership?address=1.2.3.4:4647" +``` + + + + [consensus protocol guide]: /nomad/docs/concepts/consensus diff --git a/website/content/docs/commands/operator/raft/transfer-leadership.mdx b/website/content/docs/commands/operator/raft/transfer-leadership.mdx new file mode 100644 index 00000000000..0520b530b9b --- /dev/null +++ b/website/content/docs/commands/operator/raft/transfer-leadership.mdx @@ -0,0 +1,57 @@ +--- +layout: docs +page_title: 'Commands: operator raft transfer-leadership' +description: | + Transfer leadership to a specific a Nomad server. +--- + +# Command: operator raft transfer-leadership + +Transfer leadership from the current leader to the given server member. + +While performing a [rolling upgrade][] of your Nomad cluster, it might be +advisable to transfer leadership to a specific node in the cluster. For example, +setting the leader to the first upgraded server in the cluster can prevent +leadership churn as you upgrade the remaining server nodes. + +The target server's ID or address:port are required and can be obtained by +running the [`nomad operator raft list-peers`][] command or by calling the +[Read Raft Configuration][] API endpoint. + +For an API to perform these operations programmatically, please see the +documentation for the [Operator][] endpoint. + +## Usage + +```plaintext +nomad operator raft transfer-leadership [options] +``` + + +If ACLs are enabled, this command requires a management token. + + +## General Options + +@include 'general_options_no_namespace.mdx' + +## Transfer Leadership Options + +- `-peer-address`: Specifies the Raft **Address** of the target server as + provided in the output of the [`nomad operator raft list-peers`][] command or + the [Read Raft Configuration] API endpoint. + +- `-peer-id`: Specifies the Raft **ID** of the target server as provided in the + output of the [`nomad operator raft list-peers`][] command or the + [Read Raft Configuration] API endpoint. + + + + Either `-peer-address` or `-peer-id` must be provided, but not both. + + + +[`nomad operator raft list-peers`]: /nomad/docs/commands/operator/raft/list-peers 'Nomad operator raft list-peers command' +[operator]: /nomad/api-docs/operator 'Nomad Operator API' +[rolling upgrade]: /nomad/docs/upgrade#upgrade-process +[Read Raft Configuration]: /nomad/api-docs/operator/raft#read-raft-configuration diff --git a/website/data/docs-nav-data.json b/website/data/docs-nav-data.json index 5a1acc2a430..25afbf5482e 100644 --- a/website/data/docs-nav-data.json +++ b/website/data/docs-nav-data.json @@ -800,6 +800,10 @@ { "title": "state", "path": "commands/operator/raft/state" + }, + { + "title": "transfer-leadership", + "path": "commands/operator/raft/transfer-leadership" } ] },