diff --git a/operators/constellation-node-operator/controllers/nodeversion_controller.go b/operators/constellation-node-operator/controllers/nodeversion_controller.go index cfa3e315e2..a9508e5711 100644 --- a/operators/constellation-node-operator/controllers/nodeversion_controller.go +++ b/operators/constellation-node-operator/controllers/nodeversion_controller.go @@ -136,9 +136,9 @@ func (r *NodeVersionReconciler) Reconcile(ctx context.Context, req ctrl.Request) logr.Error(err, "Unable to list scaling groups") return ctrl.Result{}, err } - scalingGroupByID := make(map[string]updatev1alpha1.ScalingGroup, len(scalingGroupList.Items)) + scalingGroupByID := make(scalingGroupByID, len(scalingGroupList.Items)) for _, scalingGroup := range scalingGroupList.Items { - scalingGroupByID[strings.ToLower(scalingGroup.Spec.GroupID)] = scalingGroup + scalingGroupByID.put(scalingGroup) } annotatedNodes, invalidNodes := r.annotateNodes(ctx, nodeList.Items) groups := groupNodes(annotatedNodes, pendingNodeList.Items, desiredNodeVersion.Spec.ImageReference, desiredNodeVersion.Spec.KubernetesComponentsReference) @@ -217,7 +217,7 @@ func (r *NodeVersionReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{Requeue: shouldRequeue}, nil } - newNodeConfig := newNodeConfig{desiredNodeVersion, groups.Outdated, pendingNodeList.Items, scalingGroupByID, newNodesBudget} + newNodeConfig := newNodeConfig{desiredNodeVersion, groups, pendingNodeList.Items, scalingGroupByID, newNodesBudget} if err := r.createNewNodes(ctx, newNodeConfig); err != nil { logr.Error(err, "Creating new nodes") return ctrl.Result{Requeue: shouldRequeue}, nil @@ -613,12 +613,21 @@ func (r *NodeVersionReconciler) deleteNode(ctx context.Context, controller metav // createNewNodes creates new nodes using up to date images as replacement for outdated nodes. func (r *NodeVersionReconciler) createNewNodes(ctx context.Context, config newNodeConfig) error { + hasOutdatedControlPlanes := func() bool { + for _, entry := range append(config.groups.Outdated, config.groups.Donors...) { + if nodeutil.IsControlPlaneNode(&entry) { + return true + } + } + return false + }() + logr := log.FromContext(ctx) - if config.newNodesBudget < 1 || len(config.outdatedNodes) == 0 { + if config.newNodesBudget < 1 || len(config.groups.Outdated) == 0 { return nil } outdatedNodesPerScalingGroup := make(map[string]int) - for _, node := range config.outdatedNodes { + for _, node := range config.groups.Outdated { // skip outdated nodes that got assigned an heir in this Reconcile call if len(node.Annotations[heirAnnotation]) != 0 { continue @@ -640,17 +649,22 @@ func (r *NodeVersionReconciler) createNewNodes(ctx context.Context, config newNo requiredNodesPerScalingGroup[scalingGroupID] = outdatedNodesPerScalingGroup[scalingGroupID] - pendingJoiningNodesPerScalingGroup[scalingGroupID] } } - for scalingGroupID := range requiredNodesPerScalingGroup { - scalingGroup, ok := config.scalingGroupByID[scalingGroupID] + for _, scalingGroupID := range config.scalingGroupByID.getScalingGroupIDsSorted() { + scalingGroup, ok := config.scalingGroupByID.get(scalingGroupID) if !ok { logr.Info("Scaling group does not have matching resource", "scalingGroup", scalingGroupID, "scalingGroups", config.scalingGroupByID) continue } + if requiredNodesPerScalingGroup[scalingGroupID] == 0 { + logr.Info("No new nodes needed for scaling group", "scalingGroup", scalingGroupID) + continue + } if !strings.EqualFold(scalingGroup.Status.ImageReference, config.desiredNodeVersion.Spec.ImageReference) { logr.Info("Scaling group does not use latest image", "scalingGroup", scalingGroupID, "usedImage", scalingGroup.Status.ImageReference, "wantedImage", config.desiredNodeVersion.Spec.ImageReference) continue } - if requiredNodesPerScalingGroup[scalingGroupID] == 0 { + if hasOutdatedControlPlanes && scalingGroup.Spec.Role != updatev1alpha1.ControlPlaneRole { + logr.Info("There are still outdated control plane nodes which must be replaced first before this worker scaling group is upgraded", "scalingGroup", scalingGroupID) continue } for { @@ -679,10 +693,16 @@ func (r *NodeVersionReconciler) createNewNodes(ctx context.Context, config newNo if err := ctrl.SetControllerReference(&config.desiredNodeVersion, pendingNode, r.Scheme); err != nil { return err } + // Note that while we call Create here, it is not certain that the next reconciler iteration + // will see the pending node. This is because of delays in the kubeAPI. + // Currently, we don't explicitly wait until we can Get the resource. + // We can never be certain that other calls to Get will also see the resource, + // therefore it's just a tradeoff of the probability that we create more nodes than + // the specified budget. if err := r.Create(ctx, pendingNode); err != nil { return err } - logr.Info("Created new node", "createdNode", nodeName, "scalingGroup", scalingGroupID) + logr.Info("Created new node", "createdNode", nodeName, "scalingGroup", scalingGroupID, "requiredNodes", requiredNodesPerScalingGroup[scalingGroupID]) requiredNodesPerScalingGroup[scalingGroupID]-- config.newNodesBudget-- } @@ -939,10 +959,35 @@ type kubernetesServerVersionGetter interface { ServerVersion() (*version.Info, error) } +type scalingGroupByID map[string]updatev1alpha1.ScalingGroup + +// getScalingGroupIDsSorted returns the group IDs of all scaling groups with +// scaling groups with the role "control-plane" first. +func (s scalingGroupByID) getScalingGroupIDsSorted() []string { + var controlPlaneGroupIDs, otherGroupIDs []string + for id := range s { + if s[id].Spec.Role == updatev1alpha1.ControlPlaneRole { + controlPlaneGroupIDs = append(controlPlaneGroupIDs, id) + } else { + otherGroupIDs = append(otherGroupIDs, id) + } + } + return append(controlPlaneGroupIDs, otherGroupIDs...) +} + +func (s scalingGroupByID) put(scalingGroup updatev1alpha1.ScalingGroup) { + s[strings.ToLower(scalingGroup.Spec.GroupID)] = scalingGroup +} + +func (s scalingGroupByID) get(scalingGroupID string) (scalingGroup updatev1alpha1.ScalingGroup, ok bool) { + scalingGroup, ok = s[strings.ToLower(scalingGroupID)] + return +} + type newNodeConfig struct { desiredNodeVersion updatev1alpha1.NodeVersion - outdatedNodes []corev1.Node + groups nodeGroups pendingNodes []updatev1alpha1.PendingNode - scalingGroupByID map[string]updatev1alpha1.ScalingGroup + scalingGroupByID scalingGroupByID newNodesBudget uint32 } diff --git a/operators/constellation-node-operator/controllers/nodeversion_controller_test.go b/operators/constellation-node-operator/controllers/nodeversion_controller_test.go index 383e8593b5..b30bbc4191 100644 --- a/operators/constellation-node-operator/controllers/nodeversion_controller_test.go +++ b/operators/constellation-node-operator/controllers/nodeversion_controller_test.go @@ -560,6 +560,51 @@ func TestCreateNewNodes(t *testing.T) { budget: 2, wantCreateCalls: []string{"scaling-group"}, }, + "outdated nodes still contain control plane nodes": { + outdatedNodes: []corev1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "control-plane-node", + Annotations: map[string]string{ + scalingGroupAnnotation: "control-plane-scaling-group", + }, + Labels: map[string]string{ + constants.ControlPlaneRoleLabel: "", + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node", + Annotations: map[string]string{ + scalingGroupAnnotation: "scaling-group", + }, + }, + }, + }, + scalingGroupByID: map[string]updatev1alpha1.ScalingGroup{ + "scaling-group": { + Spec: updatev1alpha1.ScalingGroupSpec{ + GroupID: "scaling-group", + Role: updatev1alpha1.WorkerRole, + }, + Status: updatev1alpha1.ScalingGroupStatus{ + ImageReference: "image", + }, + }, + "control-plane-scaling-group": { + Spec: updatev1alpha1.ScalingGroupSpec{ + GroupID: "control-plane-scaling-group", + Role: updatev1alpha1.ControlPlaneRole, + }, + Status: updatev1alpha1.ScalingGroupStatus{ + ImageReference: "image", + }, + }, + }, + budget: 2, + wantCreateCalls: []string{"control-plane-scaling-group"}, + }, "scaling group does not exist": { outdatedNodes: []corev1.Node{ { @@ -592,7 +637,10 @@ func TestCreateNewNodes(t *testing.T) { }, Scheme: getScheme(t), } - newNodeConfig := newNodeConfig{desiredNodeImage, tc.outdatedNodes, tc.pendingNodes, tc.scalingGroupByID, tc.budget} + groups := nodeGroups{ + Outdated: tc.outdatedNodes, + } + newNodeConfig := newNodeConfig{desiredNodeImage, groups, tc.pendingNodes, tc.scalingGroupByID, tc.budget} err := reconciler.createNewNodes(context.Background(), newNodeConfig) require.NoError(err) assert.Equal(tc.wantCreateCalls, reconciler.nodeReplacer.(*stubNodeReplacerWriter).createCalls)