From dd36bc03435095f43acc4c595d42bd023104734b Mon Sep 17 00:00:00 2001 From: justinsb Date: Sat, 9 Nov 2024 11:30:48 -0500 Subject: [PATCH] Introduce --reconcile flag to kOps Kubernetes 1.31 now stops nodes joining a cluster if the minor version of the node is greater than the minor version of the control plane. The addition of the instance-group-roles flag to update means that we can now update / rolling-update the control plane first. However, we must now issue four commands: * Update control plane * Rolling update control plane * Update nodes * Rolling update nodes This adds a flag to automate this process. It is implemented by executing those 4 steps in sequence. Update is also smart enough to not update the nodes if this would violate the skew policy, but we do this explicitly in the reconcile command to be clearer and safer. --- cmd/kops/reconcile_cluster.go | 86 +++++++++++++++++++++++++++++++++ cmd/kops/update_cluster.go | 14 ++++++ docs/cli/kops_update_cluster.md | 1 + 3 files changed, 101 insertions(+) create mode 100644 cmd/kops/reconcile_cluster.go diff --git a/cmd/kops/reconcile_cluster.go b/cmd/kops/reconcile_cluster.go new file mode 100644 index 0000000000000..02bb153281815 --- /dev/null +++ b/cmd/kops/reconcile_cluster.go @@ -0,0 +1,86 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + "context" + "fmt" + "io" + + "k8s.io/kops/cmd/kops/util" + "k8s.io/kops/pkg/apis/kops" +) + +// ReconcileCluster updates the cluster to the desired state, including rolling updates where necessary. +// To respect skew policy, it updates the control plane first, then updates the nodes. +// "update" is probably now smart enough to automatically not update the control plane if it is already at the desired version, +// but we do it explicitly here to be clearer / safer. +func ReconcileCluster(ctx context.Context, f *util.Factory, out io.Writer, c *UpdateClusterOptions) error { + fmt.Fprintf(out, "Updating control plane configuration\n") + { + opt := *c + opt.Reconcile = false // Prevent infinite loop + opt.InstanceGroupRoles = []string{ + string(kops.InstanceGroupRoleAPIServer), + string(kops.InstanceGroupRoleControlPlane), + } + if _, err := RunUpdateCluster(ctx, f, out, &opt); err != nil { + return err + } + } + + fmt.Fprintf(out, "Doing rolling-update for control plane\n") + { + opt := &RollingUpdateOptions{} + opt.InitDefaults() + opt.ClusterName = c.ClusterName + opt.InstanceGroupRoles = []string{ + string(kops.InstanceGroupRoleAPIServer), + string(kops.InstanceGroupRoleControlPlane), + } + opt.Yes = c.Yes + if err := RunRollingUpdateCluster(ctx, f, out, opt); err != nil { + return err + } + } + + fmt.Fprintf(out, "Updating node configuration\n") + { + opt := *c + opt.Reconcile = false // Prevent infinite loop + // Do all roles this time, though we only expect changes to node & bastion roles + opt.InstanceGroupRoles = nil + if _, err := RunUpdateCluster(ctx, f, out, &opt); err != nil { + return err + } + } + + fmt.Fprintf(out, "Doing rolling-update for nodes\n") + { + opt := &RollingUpdateOptions{} + opt.InitDefaults() + opt.ClusterName = c.ClusterName + // Do all roles this time, though we only expect changes to node & bastion roles + opt.InstanceGroupRoles = nil + opt.Yes = c.Yes + if err := RunRollingUpdateCluster(ctx, f, out, opt); err != nil { + return err + } + } + + return nil +} diff --git a/cmd/kops/update_cluster.go b/cmd/kops/update_cluster.go index 9b938f93920c7..204ff7f9e6a58 100644 --- a/cmd/kops/update_cluster.go +++ b/cmd/kops/update_cluster.go @@ -103,6 +103,9 @@ type UpdateClusterOptions struct { // The goal is that the cluster can keep running even during more disruptive // infrastructure changes. Prune bool + + // Reconcile is true if we should reconcile the cluster by rolling the control plane and nodes sequentially + Reconcile bool } func (o *UpdateClusterOptions) InitDefaults() { @@ -117,6 +120,7 @@ func (o *UpdateClusterOptions) InitDefaults() { o.CreateKubecfg = true o.Prune = false + o.Reconcile = false o.RunTasksOptions.InitDefaults() } @@ -193,6 +197,16 @@ type UpdateClusterResults struct { } func RunUpdateCluster(ctx context.Context, f *util.Factory, out io.Writer, c *UpdateClusterOptions) (*UpdateClusterResults, error) { + if c.Reconcile { + if !c.Yes { + return nil, fmt.Errorf("--reconcile is only supported with --yes") + } + if c.Target == cloudup.TargetTerraform { + return nil, fmt.Errorf("--reconcile is not supported with terraform") + } + return nil, ReconcileCluster(ctx, f, out, c) + } + results := &UpdateClusterResults{} isDryrun := false diff --git a/docs/cli/kops_update_cluster.md b/docs/cli/kops_update_cluster.md index 332fe909d050d..6d190cb52b4ce 100644 --- a/docs/cli/kops_update_cluster.md +++ b/docs/cli/kops_update_cluster.md @@ -37,6 +37,7 @@ kops update cluster [CLUSTER] [flags] --out string Path to write any local output --phase string Subset of tasks to run: cluster, network, security --prune Delete old revisions of cloud resources that were needed during an upgrade + --reconcile Reconcile the cluster by rolling the control plane and nodes sequentially --ssh-public-key string SSH public key to use (deprecated: use kops create secret instead) --target string Target - direct, terraform (default "direct") --user string Existing user in kubeconfig file to use. Implies --create-kube-config