Skip to content

Commit

Permalink
Introduce --reconcile flag to kOps
Browse files Browse the repository at this point in the history
Kubernetes 1.31 now stops nodes joining a cluster if the minor version
of the node is greater than the minor version of the control plane.

The addition of the instance-group-roles flag to update means that we
can now update / rolling-update the control plane first.  However, we
must now issue four commands:

* Update control plane
* Rolling update control plane
* Update nodes
* Rolling update nodes

This adds a flag to automate this process.  It is implemented by
executing those 4 steps in sequence.

Update is also smart enough to not update the nodes if this would
violate the skew policy, but we do this explicitly in the reconcile
command to be clearer and safer.
  • Loading branch information
justinsb committed Dec 5, 2024
1 parent 3a8a13f commit dd36bc0
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 0 deletions.
86 changes: 86 additions & 0 deletions cmd/kops/reconcile_cluster.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/*
Copyright 2024 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package main

import (
"context"
"fmt"
"io"

"k8s.io/kops/cmd/kops/util"
"k8s.io/kops/pkg/apis/kops"
)

// ReconcileCluster updates the cluster to the desired state, including rolling updates where necessary.
// To respect skew policy, it updates the control plane first, then updates the nodes.
// "update" is probably now smart enough to automatically not update the control plane if it is already at the desired version,
// but we do it explicitly here to be clearer / safer.
func ReconcileCluster(ctx context.Context, f *util.Factory, out io.Writer, c *UpdateClusterOptions) error {
fmt.Fprintf(out, "Updating control plane configuration\n")
{
opt := *c
opt.Reconcile = false // Prevent infinite loop
opt.InstanceGroupRoles = []string{
string(kops.InstanceGroupRoleAPIServer),
string(kops.InstanceGroupRoleControlPlane),
}
if _, err := RunUpdateCluster(ctx, f, out, &opt); err != nil {
return err
}
}

fmt.Fprintf(out, "Doing rolling-update for control plane\n")
{
opt := &RollingUpdateOptions{}
opt.InitDefaults()
opt.ClusterName = c.ClusterName
opt.InstanceGroupRoles = []string{
string(kops.InstanceGroupRoleAPIServer),
string(kops.InstanceGroupRoleControlPlane),
}
opt.Yes = c.Yes
if err := RunRollingUpdateCluster(ctx, f, out, opt); err != nil {
return err
}
}

fmt.Fprintf(out, "Updating node configuration\n")
{
opt := *c
opt.Reconcile = false // Prevent infinite loop
// Do all roles this time, though we only expect changes to node & bastion roles
opt.InstanceGroupRoles = nil
if _, err := RunUpdateCluster(ctx, f, out, &opt); err != nil {
return err
}
}

fmt.Fprintf(out, "Doing rolling-update for nodes\n")
{
opt := &RollingUpdateOptions{}
opt.InitDefaults()
opt.ClusterName = c.ClusterName
// Do all roles this time, though we only expect changes to node & bastion roles
opt.InstanceGroupRoles = nil
opt.Yes = c.Yes
if err := RunRollingUpdateCluster(ctx, f, out, opt); err != nil {
return err
}
}

return nil
}
14 changes: 14 additions & 0 deletions cmd/kops/update_cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,9 @@ type UpdateClusterOptions struct {
// The goal is that the cluster can keep running even during more disruptive
// infrastructure changes.
Prune bool

// Reconcile is true if we should reconcile the cluster by rolling the control plane and nodes sequentially
Reconcile bool
}

func (o *UpdateClusterOptions) InitDefaults() {
Expand All @@ -117,6 +120,7 @@ func (o *UpdateClusterOptions) InitDefaults() {
o.CreateKubecfg = true

o.Prune = false
o.Reconcile = false

o.RunTasksOptions.InitDefaults()
}
Expand Down Expand Up @@ -193,6 +197,16 @@ type UpdateClusterResults struct {
}

func RunUpdateCluster(ctx context.Context, f *util.Factory, out io.Writer, c *UpdateClusterOptions) (*UpdateClusterResults, error) {
if c.Reconcile {
if !c.Yes {
return nil, fmt.Errorf("--reconcile is only supported with --yes")
}
if c.Target == cloudup.TargetTerraform {
return nil, fmt.Errorf("--reconcile is not supported with terraform")
}
return nil, ReconcileCluster(ctx, f, out, c)
}

results := &UpdateClusterResults{}

isDryrun := false
Expand Down
1 change: 1 addition & 0 deletions docs/cli/kops_update_cluster.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit dd36bc0

Please sign in to comment.