From 1b0b6b569955af0d9b2e1bfdece5624862241667 Mon Sep 17 00:00:00 2001 From: Moritz Sanft <58110325+msanft@users.noreply.github.com> Date: Wed, 26 Feb 2025 12:57:08 +0100 Subject: [PATCH] constellation-node-operator: upgrade control plane nodes first (#3663) --- .../controllers/nodeversion_controller.go | 20 +++- .../nodeversion_controller_test.go | 102 +++++++++++++++++- 2 files changed, 119 insertions(+), 3 deletions(-) diff --git a/operators/constellation-node-operator/controllers/nodeversion_controller.go b/operators/constellation-node-operator/controllers/nodeversion_controller.go index fc185b1e2f..c46c7b3131 100644 --- a/operators/constellation-node-operator/controllers/nodeversion_controller.go +++ b/operators/constellation-node-operator/controllers/nodeversion_controller.go @@ -214,7 +214,7 @@ func (r *NodeVersionReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{Requeue: shouldRequeue}, nil } - newNodeConfig := newNodeConfig{desiredNodeVersion, groups.Outdated, pendingNodeList.Items, scalingGroupByID, newNodesBudget} + newNodeConfig := newNodeConfig{desiredNodeVersion, groups.Outdated, groups.Donors, pendingNodeList.Items, scalingGroupByID, newNodesBudget} if err := r.createNewNodes(ctx, newNodeConfig); err != nil { logr.Error(err, "Creating new nodes") return ctrl.Result{Requeue: shouldRequeue}, nil @@ -614,6 +614,15 @@ func (r *NodeVersionReconciler) createNewNodes(ctx context.Context, config newNo if config.newNodesBudget < 1 || len(config.outdatedNodes) == 0 { return nil } + // We need to look at both the outdated nodes *and* the nodes that have already + // been moved to the donors here because even if a CP node has already been moved to + // the donors, we still want to defer worker upgrades until the new CP node is actually joined. + hasOutdatedControlPlanes := false + for _, entry := range append(config.outdatedNodes, config.donors...) { + if nodeutil.IsControlPlaneNode(&entry) { + hasOutdatedControlPlanes = true + } + } outdatedNodesPerScalingGroup := make(map[string]int) for _, node := range config.outdatedNodes { // skip outdated nodes that got assigned an heir in this Reconcile call @@ -648,6 +657,12 @@ func (r *NodeVersionReconciler) createNewNodes(ctx context.Context, config newNo continue } if requiredNodesPerScalingGroup[scalingGroupID] == 0 { + logr.Info("No new nodes needed for scaling group", "scalingGroup", scalingGroupID) + continue + } + // if we are a worker group and still have outdated control planes, we must wait for them to be upgraded. + if hasOutdatedControlPlanes && scalingGroup.Spec.Role != updatev1alpha1.ControlPlaneRole { + logr.Info("There are still outdated control plane nodes which must be replaced first before this worker scaling group is upgraded", "scalingGroup", scalingGroupID) continue } for { @@ -679,7 +694,7 @@ func (r *NodeVersionReconciler) createNewNodes(ctx context.Context, config newNo if err := r.Create(ctx, pendingNode); err != nil { return err } - logr.Info("Created new node", "createdNode", nodeName, "scalingGroup", scalingGroupID) + logr.Info("Created new node", "createdNode", nodeName, "scalingGroup", scalingGroupID, "requiredNodes", requiredNodesPerScalingGroup[scalingGroupID]) requiredNodesPerScalingGroup[scalingGroupID]-- config.newNodesBudget-- } @@ -939,6 +954,7 @@ type kubernetesServerVersionGetter interface { type newNodeConfig struct { desiredNodeVersion updatev1alpha1.NodeVersion outdatedNodes []corev1.Node + donors []corev1.Node pendingNodes []updatev1alpha1.PendingNode scalingGroupByID map[string]updatev1alpha1.ScalingGroup newNodesBudget int diff --git a/operators/constellation-node-operator/controllers/nodeversion_controller_test.go b/operators/constellation-node-operator/controllers/nodeversion_controller_test.go index c9ae880421..6b85318517 100644 --- a/operators/constellation-node-operator/controllers/nodeversion_controller_test.go +++ b/operators/constellation-node-operator/controllers/nodeversion_controller_test.go @@ -330,6 +330,7 @@ func TestMatchDonorsAndHeirs(t *testing.T) { func TestCreateNewNodes(t *testing.T) { testCases := map[string]struct { outdatedNodes []corev1.Node + donors []corev1.Node pendingNodes []updatev1alpha1.PendingNode scalingGroupByID map[string]updatev1alpha1.ScalingGroup budget int @@ -573,6 +574,105 @@ func TestCreateNewNodes(t *testing.T) { }, budget: 1, }, + "control plane node upgraded first": { + outdatedNodes: []corev1.Node{ + // CP node + { + ObjectMeta: metav1.ObjectMeta{ + Name: "control-plane-node", + Annotations: map[string]string{ + scalingGroupAnnotation: "control-plane-scaling-group", + }, + Labels: map[string]string{ + // Mark this as a CP node as per + // https://kubernetes.io/docs/reference/labels-annotations-taints/#node-role-kubernetes-io-control-plane + "node-role.kubernetes.io/control-plane": "", + }, + }, + }, + // Worker node + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node", + Annotations: map[string]string{ + scalingGroupAnnotation: "scaling-group", + }, + }, + }, + }, + scalingGroupByID: map[string]updatev1alpha1.ScalingGroup{ + "scaling-group": { + Spec: updatev1alpha1.ScalingGroupSpec{ + GroupID: "scaling-group", + Role: updatev1alpha1.WorkerRole, + }, + Status: updatev1alpha1.ScalingGroupStatus{ + ImageReference: "image", + }, + }, + "control-plane-scaling-group": { + Spec: updatev1alpha1.ScalingGroupSpec{ + GroupID: "control-plane-scaling-group", + Role: updatev1alpha1.ControlPlaneRole, + }, + Status: updatev1alpha1.ScalingGroupStatus{ + ImageReference: "image", + }, + }, + }, + budget: 2, + wantCreateCalls: []string{"control-plane-scaling-group"}, + }, + "worker not upgraded while cp is in donors": { + donors: []corev1.Node{ + // CP node + { + ObjectMeta: metav1.ObjectMeta{ + Name: "control-plane-node", + Annotations: map[string]string{ + scalingGroupAnnotation: "control-plane-scaling-group", + }, + Labels: map[string]string{ + // Mark this as a CP node as per + // https://kubernetes.io/docs/reference/labels-annotations-taints/#node-role-kubernetes-io-control-plane + "node-role.kubernetes.io/control-plane": "", + }, + }, + }, + }, + outdatedNodes: []corev1.Node{ + // Worker node + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node", + Annotations: map[string]string{ + scalingGroupAnnotation: "scaling-group", + }, + }, + }, + }, + scalingGroupByID: map[string]updatev1alpha1.ScalingGroup{ + "scaling-group": { + Spec: updatev1alpha1.ScalingGroupSpec{ + GroupID: "scaling-group", + Role: updatev1alpha1.WorkerRole, + }, + Status: updatev1alpha1.ScalingGroupStatus{ + ImageReference: "image", + }, + }, + "control-plane-scaling-group": { + Spec: updatev1alpha1.ScalingGroupSpec{ + GroupID: "control-plane-scaling-group", + Role: updatev1alpha1.ControlPlaneRole, + }, + Status: updatev1alpha1.ScalingGroupStatus{ + ImageReference: "image", + }, + }, + }, + budget: 1, + }, } for name, tc := range testCases { @@ -592,7 +692,7 @@ func TestCreateNewNodes(t *testing.T) { }, Scheme: getScheme(t), } - newNodeConfig := newNodeConfig{desiredNodeImage, tc.outdatedNodes, tc.pendingNodes, tc.scalingGroupByID, tc.budget} + newNodeConfig := newNodeConfig{desiredNodeImage, tc.outdatedNodes, tc.donors, tc.pendingNodes, tc.scalingGroupByID, tc.budget} err := reconciler.createNewNodes(context.Background(), newNodeConfig) require.NoError(err) assert.Equal(tc.wantCreateCalls, reconciler.nodeReplacer.(*stubNodeReplacerWriter).createCalls)