diff --git a/apis/workloads/v1alpha1/instanceset_types.go b/apis/workloads/v1alpha1/instanceset_types.go index 80c37f02d3a..965b7d98aa8 100644 --- a/apis/workloads/v1alpha1/instanceset_types.go +++ b/apis/workloads/v1alpha1/instanceset_types.go @@ -274,27 +274,72 @@ type InstanceSetSpec struct { // InstanceSetStatus defines the observed state of InstanceSet type InstanceSetStatus struct { - appsv1.StatefulSetStatus `json:",inline"` + // observedGeneration is the most recent generation observed for this InstanceSet. It corresponds to the + // InstanceSet's generation, which is updated on mutation by the API Server. + // + // +optional + ObservedGeneration int64 `json:"observedGeneration,omitempty"` + + // replicas is the number of instances created by the InstanceSet controller. + Replicas int32 `json:"replicas"` + + // readyReplicas is the number of instances created for this InstanceSet with a Ready Condition. + ReadyReplicas int32 `json:"readyReplicas,omitempty"` + + // currentReplicas is the number of instances created by the InstanceSet controller from the InstanceSet version + // indicated by CurrentRevisions. + CurrentReplicas int32 `json:"currentReplicas,omitempty"` + + // updatedReplicas is the number of instances created by the InstanceSet controller from the InstanceSet version + // indicated by UpdateRevisions. + UpdatedReplicas int32 `json:"updatedReplicas,omitempty"` + + // currentRevision, if not empty, indicates the version of the InstanceSet used to generate instances in the + // sequence [0,currentReplicas). + CurrentRevision string `json:"currentRevision,omitempty"` + + // updateRevision, if not empty, indicates the version of the InstanceSet used to generate instances in the sequence + // [replicas-updatedReplicas,replicas) + UpdateRevision string `json:"updateRevision,omitempty"` + + // Represents the latest available observations of an instanceset's current state. + // Known .status.conditions.type are: "InstanceFailure", "InstanceReady" + // + // +optional + // +patchMergeKey=type + // +patchStrategy=merge + // +listType=map + // +listMapKey=type + Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"` - // Defines the initial number of pods (members) when the cluster is first initialized. + // Total number of available instances (ready for at least minReadySeconds) targeted by this InstanceSet. + // + // +optional + AvailableReplicas int32 `json:"availableReplicas"` + + // Defines the initial number of instances when the cluster is first initialized. // This value is set to spec.Replicas at the time of object creation and remains constant thereafter. + // Used only when spec.roles set. + // + // +optional InitReplicas int32 `json:"initReplicas"` - // Represents the number of pods (members) that have already reached the MembersStatus during the cluster initialization stage. + // Represents the number of instances that have already reached the MembersStatus during the cluster initialization stage. // This value remains constant once it equals InitReplicas. + // Used only when spec.roles set. // // +optional ReadyInitReplicas int32 `json:"readyInitReplicas,omitempty"` - // When not empty, indicates the version of the InstanceSet used to generate the underlying workload. + // Provides the status of each member in the cluster. // // +optional - CurrentGeneration int64 `json:"currentGeneration,omitempty"` + MembersStatus []MemberStatus `json:"membersStatus,omitempty"` - // Provides the status of each member in the cluster. + // Indicates whether it is required for the InstanceSet to have at least one primary instance ready. // // +optional - MembersStatus []MemberStatus `json:"membersStatus,omitempty"` + ReadyWithoutPrimary bool `json:"readyWithoutPrimary,omitempty"` // currentRevisions, if not empty, indicates the old version of the InstanceSet used to generate the underlying workload. // key is the pod name, value is the revision. @@ -564,16 +609,30 @@ type MemberStatus struct { // // +optional ReplicaRole *ReplicaRole `json:"role,omitempty"` +} - // Whether the corresponding Pod is in ready condition. - // +optional - Ready bool `json:"ready,omitempty"` +type ConditionType string - // Indicates whether it is required for the InstanceSet to have at least one primary instance ready. - // - // +optional - ReadyWithoutPrimary bool `json:"readyWithoutPrimary"` -} +const ( + // InstanceReady is added in an instance set when at least one of its instances(pods) is in a Ready condition. + // ConditionStatus will be True if all its instances(pods) are in a Ready condition. + // Or, a NotReady reason with not ready instances encoded in the Message filed will be set. + InstanceReady ConditionType = "InstanceReady" + + // InstanceFailure is added in an instance set when at least one of its instances(pods) is in a `Failed` phase. + InstanceFailure ConditionType = "InstanceFailure" +) + +const ( + // ReasonNotReady is a reason for condition InstanceReady. + ReasonNotReady = "NotReady" + + // ReasonReady is a reason for condition InstanceReady. + ReasonReady = "Ready" + + // ReasonInstanceFailure is a reason for condition InstanceFailure. + ReasonInstanceFailure = "InstanceFailure" +) func (t *InstanceTemplate) GetName() string { return t.Name diff --git a/apis/workloads/v1alpha1/zz_generated.deepcopy.go b/apis/workloads/v1alpha1/zz_generated.deepcopy.go index 5548e896c8b..3e537460189 100644 --- a/apis/workloads/v1alpha1/zz_generated.deepcopy.go +++ b/apis/workloads/v1alpha1/zz_generated.deepcopy.go @@ -237,7 +237,13 @@ func (in *InstanceSetSpec) DeepCopy() *InstanceSetSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *InstanceSetStatus) DeepCopyInto(out *InstanceSetStatus) { *out = *in - in.StatefulSetStatus.DeepCopyInto(&out.StatefulSetStatus) + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]metav1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } if in.MembersStatus != nil { in, out := &in.MembersStatus, &out.MembersStatus *out = make([]MemberStatus, len(*in)) diff --git a/config/crd/bases/apps.kubeblocks.io_clusters.yaml b/config/crd/bases/apps.kubeblocks.io_clusters.yaml index 434711e5d04..29b78555762 100644 --- a/config/crd/bases/apps.kubeblocks.io_clusters.yaml +++ b/config/crd/bases/apps.kubeblocks.io_clusters.yaml @@ -7498,14 +7498,6 @@ spec: default: Unknown description: Represents the name of the pod. type: string - ready: - description: Whether the corresponding Pod is in ready - condition. - type: boolean - readyWithoutPrimary: - description: Indicates whether it is required for the - InstanceSet to have at least one primary instance ready. - type: boolean role: description: Defines the role of the replica in the cluster. properties: diff --git a/config/crd/bases/workloads.kubeblocks.io_instancesets.yaml b/config/crd/bases/workloads.kubeblocks.io_instancesets.yaml index b936792ab7b..7952b194ab0 100644 --- a/config/crd/bases/workloads.kubeblocks.io_instancesets.yaml +++ b/config/crd/bases/workloads.kubeblocks.io_instancesets.yaml @@ -12163,61 +12163,93 @@ spec: This data may be out of date. properties: availableReplicas: - description: Total number of available pods (ready for at least minReadySeconds) - targeted by this statefulset. - format: int32 - type: integer - collisionCount: - description: collisionCount is the count of hash collisions for the - StatefulSet. The StatefulSet controller uses this field as a collision - avoidance mechanism when it needs to create the name for the newest - ControllerRevision. + description: Total number of available instances (ready for at least + minReadySeconds) targeted by this InstanceSet. format: int32 type: integer conditions: - description: Represents the latest available observations of a statefulset's - current state. + description: 'Represents the latest available observations of an instanceset''s + current state. Known .status.conditions.type are: "InstanceFailure", + "InstanceReady"' items: - description: StatefulSetCondition describes the state of a statefulset - at a certain point. + description: "Condition contains details for one aspect of the current + state of this API Resource. --- This struct is intended for direct + use as an array at the field path .status.conditions. For example, + \n type FooStatus struct{ // Represents the observations of a + foo's current state. // Known .status.conditions.type are: \"Available\", + \"Progressing\", and \"Degraded\" // +patchMergeKey=type // +patchStrategy=merge + // +listType=map // +listMapKey=type Conditions []metav1.Condition + `json:\"conditions,omitempty\" patchStrategy:\"merge\" patchMergeKey:\"type\" + protobuf:\"bytes,1,rep,name=conditions\"` \n // other fields }" properties: lastTransitionTime: - description: Last time the condition transitioned from one status - to another. + description: lastTransitionTime is the last time the condition + transitioned from one status to another. This should be when + the underlying condition changed. If that is not known, then + using the time when the API field changed is acceptable. format: date-time type: string message: - description: A human readable message indicating details about - the transition. + description: message is a human readable message indicating + details about the transition. This may be an empty string. + maxLength: 32768 type: string + observedGeneration: + description: observedGeneration represents the .metadata.generation + that the condition was set based upon. For instance, if .metadata.generation + is currently 12, but the .status.conditions[x].observedGeneration + is 9, the condition is out of date with respect to the current + state of the instance. + format: int64 + minimum: 0 + type: integer reason: - description: The reason for the condition's last transition. + description: reason contains a programmatic identifier indicating + the reason for the condition's last transition. Producers + of specific condition types may define expected values and + meanings for this field, and whether the values are considered + a guaranteed API. The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ type: string status: - description: Status of the condition, one of True, False, Unknown. + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown type: string type: - description: Type of statefulset condition. + description: type of condition in CamelCase or in foo.example.com/CamelCase. + --- Many .condition.type values are consistent across resources + like Available, but because arbitrary conditions can be useful + (see .node.status.conditions), the ability to deconflict is + important. The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ type: string required: + - lastTransitionTime + - message + - reason - status - type type: object type: array - currentGeneration: - description: When not empty, indicates the version of the InstanceSet - used to generate the underlying workload. - format: int64 - type: integer + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map currentReplicas: - description: currentReplicas is the number of Pods created by the - StatefulSet controller from the StatefulSet version indicated by - currentRevision. + description: currentReplicas is the number of instances created by + the InstanceSet controller from the InstanceSet version indicated + by CurrentRevisions. format: int32 type: integer currentRevision: description: currentRevision, if not empty, indicates the version - of the StatefulSet used to generate Pods in the sequence [0,currentReplicas). + of the InstanceSet used to generate instances in the sequence [0,currentReplicas). type: string currentRevisions: additionalProperties: @@ -12227,9 +12259,10 @@ spec: is the pod name, value is the revision. type: object initReplicas: - description: Defines the initial number of pods (members) when the - cluster is first initialized. This value is set to spec.Replicas - at the time of object creation and remains constant thereafter. + description: Defines the initial number of instances when the cluster + is first initialized. This value is set to spec.Replicas at the + time of object creation and remains constant thereafter. Used only + when spec.roles set. format: int32 type: integer membersStatus: @@ -12240,13 +12273,6 @@ spec: default: Unknown description: Represents the name of the pod. type: string - ready: - description: Whether the corresponding Pod is in ready condition. - type: boolean - readyWithoutPrimary: - description: Indicates whether it is required for the InstanceSet - to have at least one primary instance ready. - type: boolean role: description: Defines the role of the replica in the cluster. properties: @@ -12281,29 +12307,34 @@ spec: type: array observedGeneration: description: observedGeneration is the most recent generation observed - for this StatefulSet. It corresponds to the StatefulSet's generation, + for this InstanceSet. It corresponds to the InstanceSet's generation, which is updated on mutation by the API Server. format: int64 type: integer readyInitReplicas: - description: Represents the number of pods (members) that have already + description: Represents the number of instances that have already reached the MembersStatus during the cluster initialization stage. - This value remains constant once it equals InitReplicas. + This value remains constant once it equals InitReplicas. Used only + when spec.roles set. format: int32 type: integer readyReplicas: - description: readyReplicas is the number of pods created for this - StatefulSet with a Ready Condition. + description: readyReplicas is the number of instances created for + this InstanceSet with a Ready Condition. format: int32 type: integer + readyWithoutPrimary: + description: Indicates whether it is required for the InstanceSet + to have at least one primary instance ready. + type: boolean replicas: - description: replicas is the number of Pods created by the StatefulSet + description: replicas is the number of instances created by the InstanceSet controller. format: int32 type: integer updateRevision: description: updateRevision, if not empty, indicates the version of - the StatefulSet used to generate Pods in the sequence [replicas-updatedReplicas,replicas) + the InstanceSet used to generate instances in the sequence [replicas-updatedReplicas,replicas) type: string updateRevisions: additionalProperties: @@ -12313,13 +12344,12 @@ spec: is the pod name, value is the revision. type: object updatedReplicas: - description: updatedReplicas is the number of Pods created by the - StatefulSet controller from the StatefulSet version indicated by - updateRevision. + description: updatedReplicas is the number of instances created by + the InstanceSet controller from the InstanceSet version indicated + by UpdateRevisions. format: int32 type: integer required: - - initReplicas - replicas type: object type: object diff --git a/controllers/apps/transformer_component_status.go b/controllers/apps/transformer_component_status.go index dfaeea67c69..d5bdc8c71b4 100644 --- a/controllers/apps/transformer_component_status.go +++ b/controllers/apps/transformer_component_status.go @@ -21,14 +21,15 @@ package apps import ( "fmt" + "strconv" "strings" "time" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" - "k8s.io/kubectl/pkg/util/podutils" "sigs.k8s.io/controller-runtime/pkg/client" appsv1alpha1 "github.com/apecloud/kubeblocks/apis/apps/v1alpha1" @@ -134,21 +135,12 @@ func (r *componentStatusHandler) reconcileComponentStatus() error { return (r.runningITS.Spec.Replicas == nil || *r.runningITS.Spec.Replicas == 0) && r.synthesizeComp.Replicas == 0 }() - // get the component's underlying pods - pods, err := component.ListPodOwnedByComponent(r.reqCtx.Ctx, r.cli, r.cluster.Namespace, - constant.GetComponentWellKnownLabels(r.cluster.Name, r.synthesizeComp.Name), inDataContext4C()) - if err != nil { - return err - } hasComponentPod := func() bool { - return len(pods) > 0 + return r.runningITS.Status.Replicas > 0 }() // check if the ITS is running - isITSRunning, err := r.isInstanceSetRunning() - if err != nil { - return err - } + isITSUpdatedNRunning := r.isInstanceSetRunning() // check if all configTemplates are synced isAllConfigSynced, err := r.isAllConfigSynced() @@ -157,10 +149,7 @@ func (r *componentStatusHandler) reconcileComponentStatus() error { } // check if the component has failed pod - hasFailedPod, messages, err := r.hasFailedPod(pods) - if err != nil { - return err - } + hasFailedPod, messages := r.hasFailedPod() // check if the component scale out failed isScaleOutFailed, err := r.isScaleOutFailed() @@ -180,10 +169,7 @@ func (r *componentStatusHandler) reconcileComponentStatus() error { }() // check if the component is available - isComponentAvailable, err := r.isComponentAvailable(pods) - if err != nil { - return err - } + isComponentAvailable := r.isComponentAvailable() // check if the component is in creating phase isInCreatingPhase := func() bool { @@ -193,7 +179,7 @@ func (r *componentStatusHandler) reconcileComponentStatus() error { r.reqCtx.Log.Info( fmt.Sprintf("component status conditions, isInstanceSetRunning: %v, isAllConfigSynced: %v, hasRunningVolumeExpansion: %v, hasFailure: %v, isInCreatingPhase: %v, isComponentAvailable: %v", - isITSRunning, isAllConfigSynced, hasRunningVolumeExpansion, hasFailure, isInCreatingPhase, isComponentAvailable)) + isITSUpdatedNRunning, isAllConfigSynced, hasRunningVolumeExpansion, hasFailure, isInCreatingPhase, isComponentAvailable)) r.podsReady = false switch { @@ -205,7 +191,7 @@ func (r *componentStatusHandler) reconcileComponentStatus() error { case isZeroReplica: r.setComponentStatusPhase(appsv1alpha1.StoppedClusterCompPhase, nil, "component is Stopped") r.podsReady = true - case isITSRunning && isAllConfigSynced && !hasRunningVolumeExpansion: + case isITSUpdatedNRunning && isAllConfigSynced && !hasRunningVolumeExpansion: r.setComponentStatusPhase(appsv1alpha1.RunningClusterCompPhase, nil, "component is Running") r.podsReady = true case !hasFailure && isInCreatingPhase: @@ -221,61 +207,49 @@ func (r *componentStatusHandler) reconcileComponentStatus() error { return nil } +func (r *componentStatusHandler) isWorkloadUpdated() bool { + if r.cluster == nil || r.runningITS == nil { + return false + } + // check whether component spec has been sent to the underlying workload + itsComponentGeneration := r.runningITS.GetAnnotations()[constant.KubeBlocksGenerationKey] + return itsComponentGeneration == strconv.FormatInt(r.cluster.Generation, 10) +} + // isComponentAvailable tells whether the component is basically available, ether working well or in a fragile state: // 1. at least one pod is available // 2. with latest revision // 3. and with leader role label set -func (r *componentStatusHandler) isComponentAvailable(pods []*corev1.Pod) (bool, error) { - if isLatestRevision, err := component.IsComponentPodsWithLatestRevision(r.reqCtx.Ctx, r.cli, r.cluster, r.runningITS); err != nil { - return false, err - } else if !isLatestRevision { - return false, nil +func (r *componentStatusHandler) isComponentAvailable() bool { + if !r.isWorkloadUpdated() { + return false } - - shouldCheckRole := len(r.synthesizeComp.Roles) > 0 - - hasLeaderRoleLabel := func(pod *corev1.Pod) bool { - roleName, ok := pod.Labels[constant.RoleLabelKey] - if !ok { - return false - } - for _, replicaRole := range r.runningITS.Spec.Roles { - if roleName == replicaRole.Name && replicaRole.IsLeader { - return true - } - } + if r.runningITS.Status.CurrentRevision != r.runningITS.Status.UpdateRevision { return false } - - hasPodAvailable := false - for _, pod := range pods { - if !podutils.IsPodAvailable(pod, r.runningITS.Spec.MinReadySeconds, metav1.Time{Time: time.Now()}) { - continue - } - if shouldCheckRole && hasLeaderRoleLabel(pod) { - return true, nil - } - if !hasPodAvailable { - hasPodAvailable = !shouldCheckRole + if r.runningITS.Status.AvailableReplicas <= 0 { + return false + } + if len(r.synthesizeComp.Roles) == 0 { + return true + } + for _, status := range r.runningITS.Status.MembersStatus { + if status.ReplicaRole.IsLeader { + return true } } - return hasPodAvailable, nil + return false } // isRunning checks if the component underlying workload is running. -func (r *componentStatusHandler) isInstanceSetRunning() (bool, error) { +func (r *componentStatusHandler) isInstanceSetRunning() bool { if r.runningITS == nil { - return false, nil + return false } - if isLatestRevision, err := component.IsComponentPodsWithLatestRevision(r.reqCtx.Ctx, r.cli, r.cluster, r.runningITS); err != nil { - return false, err - } else if !isLatestRevision { - r.reqCtx.Log.Info("underlying workload is not the latest revision") - return false, nil + if !r.isWorkloadUpdated() { + return false } - - // whether the ITS is ready - return instanceset.IsInstanceSetReady(r.runningITS), nil + return instanceset.IsInstanceSetReady(r.runningITS) } // isAllConfigSynced checks if all configTemplates are synced. @@ -397,46 +371,44 @@ func (r *componentStatusHandler) getRunningVolumes(reqCtx intctrlutil.RequestCtx return matchedPVCs, nil } -// hasFailedPod checks if the component has failed pod. -// TODO(xingran): remove the dependency of the component's workload type. -func (r *componentStatusHandler) hasFailedPod(pods []*corev1.Pod) (bool, appsv1alpha1.ComponentMessageMap, error) { - if isLatestRevision, err := component.IsComponentPodsWithLatestRevision(r.reqCtx.Ctx, r.cli, r.cluster, r.runningITS); err != nil { - return false, nil, err - } else if !isLatestRevision { - return false, nil, nil - } - - var messages appsv1alpha1.ComponentMessageMap - // check pod readiness - hasFailedPod, msg, _ := hasFailedAndTimedOutPod(pods) +// hasFailedPod checks if the instance set has failed pod. +func (r *componentStatusHandler) hasFailedPod() (bool, appsv1alpha1.ComponentMessageMap) { + messages := appsv1alpha1.ComponentMessageMap{} + // check InstanceFailure condition + hasFailedPod := meta.IsStatusConditionTrue(r.runningITS.Status.Conditions, string(workloads.InstanceFailure)) if hasFailedPod { - messages = msg - return true, messages, nil + failureCondition := meta.FindStatusCondition(r.runningITS.Status.Conditions, string(workloads.InstanceFailure)) + messages.SetObjectMessage(workloads.Kind, r.runningITS.Name, failureCondition.Message) + return true, messages } - // check role probe - if r.synthesizeComp.WorkloadType != appsv1alpha1.Consensus && r.synthesizeComp.WorkloadType != appsv1alpha1.Replication { - return false, messages, nil + + // check InstanceReady condition + condition := meta.FindStatusCondition(r.runningITS.Status.Conditions, string(workloads.InstanceReady)) + if condition == nil { + return false, nil } - hasProbeTimeout := false - for _, pod := range pods { - if _, ok := pod.Labels[constant.RoleLabelKey]; ok { - continue - } - for _, condition := range pod.Status.Conditions { - if condition.Type != corev1.PodReady || condition.Status != corev1.ConditionTrue { - continue - } - podsReadyTime := &condition.LastTransitionTime - if IsProbeTimeout(r.synthesizeComp.Probes, podsReadyTime) { - hasProbeTimeout = true - if messages == nil { - messages = appsv1alpha1.ComponentMessageMap{} - } - messages.SetObjectMessage(pod.Kind, pod.Name, "Role probe timeout, check whether the application is available") - } + if condition.Status == metav1.ConditionFalse { + if time.Now().After(condition.LastTransitionTime.Add(intctrlutil.PodScheduledFailedTimeout)) { + messages.SetObjectMessage(workloads.Kind, r.runningITS.Name, condition.Message) + return true, messages } + return false, nil } - return hasProbeTimeout, messages, nil + + // all instances are in Ready condition, check role probe + if len(r.runningITS.Spec.Roles) == 0 { + return false, nil + } + if len(r.runningITS.Status.MembersStatus) == int(r.runningITS.Status.Replicas) { + return false, nil + } + probeTimeoutDuration := time.Duration(appsv1alpha1.DefaultRoleProbeTimeoutAfterPodsReady) * time.Second + if time.Now().After(condition.LastTransitionTime.Add(probeTimeoutDuration)) { + messages.SetObjectMessage(workloads.Kind, r.runningITS.Name, "Role probe timeout, check whether the application is available") + return true, messages + } + + return false, nil } // setComponentStatusPhase sets the component phase and messages conditionally. @@ -478,32 +450,6 @@ func (r *componentStatusHandler) updateComponentStatus(phaseTransitionMsg string return nil } -// hasFailedAndTimedOutPod returns whether the pods of components are still failed after a PodFailedTimeout period. -func hasFailedAndTimedOutPod(pods []*corev1.Pod) (bool, appsv1alpha1.ComponentMessageMap, time.Duration) { - var ( - hasTimedOutPod bool - messages = appsv1alpha1.ComponentMessageMap{} - hasFailedPod bool - requeueAfter time.Duration - ) - for _, pod := range pods { - isFailed, isTimedOut, messageStr := intctrlutil.IsPodFailedAndTimedOut(pod) - if !isFailed { - continue - } - if isTimedOut { - hasTimedOutPod = true - messages.SetObjectMessage(pod.Kind, pod.Name, messageStr) - } else { - hasFailedPod = true - } - } - if hasFailedPod && !hasTimedOutPod { - requeueAfter = intctrlutil.PodContainerFailedTimeout - } - return hasTimedOutPod, messages, requeueAfter -} - // newComponentStatusHandler creates a new componentStatusHandler func newComponentStatusHandler(reqCtx intctrlutil.RequestCtx, cli client.Client, diff --git a/controllers/workloads/instanceset_controller.go b/controllers/workloads/instanceset_controller.go index 6797ed40ab3..85124ccfcfb 100644 --- a/controllers/workloads/instanceset_controller.go +++ b/controllers/workloads/instanceset_controller.go @@ -42,7 +42,7 @@ import ( viper "github.com/apecloud/kubeblocks/pkg/viperx" ) -// InstanceSetReconciler reconciles a InstanceSet object +// InstanceSetReconciler reconciles an InstanceSet object type InstanceSetReconciler struct { client.Client Scheme *runtime.Scheme diff --git a/deploy/helm/crds/apps.kubeblocks.io_clusters.yaml b/deploy/helm/crds/apps.kubeblocks.io_clusters.yaml index 434711e5d04..29b78555762 100644 --- a/deploy/helm/crds/apps.kubeblocks.io_clusters.yaml +++ b/deploy/helm/crds/apps.kubeblocks.io_clusters.yaml @@ -7498,14 +7498,6 @@ spec: default: Unknown description: Represents the name of the pod. type: string - ready: - description: Whether the corresponding Pod is in ready - condition. - type: boolean - readyWithoutPrimary: - description: Indicates whether it is required for the - InstanceSet to have at least one primary instance ready. - type: boolean role: description: Defines the role of the replica in the cluster. properties: diff --git a/deploy/helm/crds/workloads.kubeblocks.io_instancesets.yaml b/deploy/helm/crds/workloads.kubeblocks.io_instancesets.yaml index b936792ab7b..7952b194ab0 100644 --- a/deploy/helm/crds/workloads.kubeblocks.io_instancesets.yaml +++ b/deploy/helm/crds/workloads.kubeblocks.io_instancesets.yaml @@ -12163,61 +12163,93 @@ spec: This data may be out of date. properties: availableReplicas: - description: Total number of available pods (ready for at least minReadySeconds) - targeted by this statefulset. - format: int32 - type: integer - collisionCount: - description: collisionCount is the count of hash collisions for the - StatefulSet. The StatefulSet controller uses this field as a collision - avoidance mechanism when it needs to create the name for the newest - ControllerRevision. + description: Total number of available instances (ready for at least + minReadySeconds) targeted by this InstanceSet. format: int32 type: integer conditions: - description: Represents the latest available observations of a statefulset's - current state. + description: 'Represents the latest available observations of an instanceset''s + current state. Known .status.conditions.type are: "InstanceFailure", + "InstanceReady"' items: - description: StatefulSetCondition describes the state of a statefulset - at a certain point. + description: "Condition contains details for one aspect of the current + state of this API Resource. --- This struct is intended for direct + use as an array at the field path .status.conditions. For example, + \n type FooStatus struct{ // Represents the observations of a + foo's current state. // Known .status.conditions.type are: \"Available\", + \"Progressing\", and \"Degraded\" // +patchMergeKey=type // +patchStrategy=merge + // +listType=map // +listMapKey=type Conditions []metav1.Condition + `json:\"conditions,omitempty\" patchStrategy:\"merge\" patchMergeKey:\"type\" + protobuf:\"bytes,1,rep,name=conditions\"` \n // other fields }" properties: lastTransitionTime: - description: Last time the condition transitioned from one status - to another. + description: lastTransitionTime is the last time the condition + transitioned from one status to another. This should be when + the underlying condition changed. If that is not known, then + using the time when the API field changed is acceptable. format: date-time type: string message: - description: A human readable message indicating details about - the transition. + description: message is a human readable message indicating + details about the transition. This may be an empty string. + maxLength: 32768 type: string + observedGeneration: + description: observedGeneration represents the .metadata.generation + that the condition was set based upon. For instance, if .metadata.generation + is currently 12, but the .status.conditions[x].observedGeneration + is 9, the condition is out of date with respect to the current + state of the instance. + format: int64 + minimum: 0 + type: integer reason: - description: The reason for the condition's last transition. + description: reason contains a programmatic identifier indicating + the reason for the condition's last transition. Producers + of specific condition types may define expected values and + meanings for this field, and whether the values are considered + a guaranteed API. The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ type: string status: - description: Status of the condition, one of True, False, Unknown. + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown type: string type: - description: Type of statefulset condition. + description: type of condition in CamelCase or in foo.example.com/CamelCase. + --- Many .condition.type values are consistent across resources + like Available, but because arbitrary conditions can be useful + (see .node.status.conditions), the ability to deconflict is + important. The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ type: string required: + - lastTransitionTime + - message + - reason - status - type type: object type: array - currentGeneration: - description: When not empty, indicates the version of the InstanceSet - used to generate the underlying workload. - format: int64 - type: integer + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map currentReplicas: - description: currentReplicas is the number of Pods created by the - StatefulSet controller from the StatefulSet version indicated by - currentRevision. + description: currentReplicas is the number of instances created by + the InstanceSet controller from the InstanceSet version indicated + by CurrentRevisions. format: int32 type: integer currentRevision: description: currentRevision, if not empty, indicates the version - of the StatefulSet used to generate Pods in the sequence [0,currentReplicas). + of the InstanceSet used to generate instances in the sequence [0,currentReplicas). type: string currentRevisions: additionalProperties: @@ -12227,9 +12259,10 @@ spec: is the pod name, value is the revision. type: object initReplicas: - description: Defines the initial number of pods (members) when the - cluster is first initialized. This value is set to spec.Replicas - at the time of object creation and remains constant thereafter. + description: Defines the initial number of instances when the cluster + is first initialized. This value is set to spec.Replicas at the + time of object creation and remains constant thereafter. Used only + when spec.roles set. format: int32 type: integer membersStatus: @@ -12240,13 +12273,6 @@ spec: default: Unknown description: Represents the name of the pod. type: string - ready: - description: Whether the corresponding Pod is in ready condition. - type: boolean - readyWithoutPrimary: - description: Indicates whether it is required for the InstanceSet - to have at least one primary instance ready. - type: boolean role: description: Defines the role of the replica in the cluster. properties: @@ -12281,29 +12307,34 @@ spec: type: array observedGeneration: description: observedGeneration is the most recent generation observed - for this StatefulSet. It corresponds to the StatefulSet's generation, + for this InstanceSet. It corresponds to the InstanceSet's generation, which is updated on mutation by the API Server. format: int64 type: integer readyInitReplicas: - description: Represents the number of pods (members) that have already + description: Represents the number of instances that have already reached the MembersStatus during the cluster initialization stage. - This value remains constant once it equals InitReplicas. + This value remains constant once it equals InitReplicas. Used only + when spec.roles set. format: int32 type: integer readyReplicas: - description: readyReplicas is the number of pods created for this - StatefulSet with a Ready Condition. + description: readyReplicas is the number of instances created for + this InstanceSet with a Ready Condition. format: int32 type: integer + readyWithoutPrimary: + description: Indicates whether it is required for the InstanceSet + to have at least one primary instance ready. + type: boolean replicas: - description: replicas is the number of Pods created by the StatefulSet + description: replicas is the number of instances created by the InstanceSet controller. format: int32 type: integer updateRevision: description: updateRevision, if not empty, indicates the version of - the StatefulSet used to generate Pods in the sequence [replicas-updatedReplicas,replicas) + the InstanceSet used to generate instances in the sequence [replicas-updatedReplicas,replicas) type: string updateRevisions: additionalProperties: @@ -12313,13 +12344,12 @@ spec: is the pod name, value is the revision. type: object updatedReplicas: - description: updatedReplicas is the number of Pods created by the - StatefulSet controller from the StatefulSet version indicated by - updateRevision. + description: updatedReplicas is the number of instances created by + the InstanceSet controller from the InstanceSet version indicated + by UpdateRevisions. format: int32 type: integer required: - - initReplicas - replicas type: object type: object diff --git a/docs/developer_docs/api-reference/cluster.md b/docs/developer_docs/api-reference/cluster.md index 9c0995baf13..49f054521fe 100644 --- a/docs/developer_docs/api-reference/cluster.md +++ b/docs/developer_docs/api-reference/cluster.md @@ -23589,6 +23589,27 @@ string +

ConditionType +(string alias)

+
+
+ + + + + + + + + + + + +
ValueDescription

"InstanceFailure"

InstanceFailure is added in an instance set when at least one of its instances(pods) is in a Failed phase.

+

"InstanceReady"

InstanceReady is added in an instance set when at least one of its instances(pods) is in a Ready condition. +ConditionStatus will be True if all its instances(pods) are in a Ready condition. +Or, a NotReady reason with not ready instances encoded in the Message filed will be set.

+

Credential

@@ -24010,54 +24031,140 @@ Credential -StatefulSetStatus
+observedGeneration
- -Kubernetes apps/v1.StatefulSetStatus +int64 + + + +(Optional) +

observedGeneration is the most recent generation observed for this InstanceSet. It corresponds to the +InstanceSet’s generation, which is updated on mutation by the API Server.

+ + + + +replicas
+ +int32 + + + +

replicas is the number of instances created by the InstanceSet controller.

+ + + + +readyReplicas
+ +int32 + + + +

readyReplicas is the number of instances created for this InstanceSet with a Ready Condition.

+ + + + +currentReplicas
+ +int32 + + + +

currentReplicas is the number of instances created by the InstanceSet controller from the InstanceSet version +indicated by CurrentRevisions.

+ + + + +updatedReplicas
+ +int32 + + + +

updatedReplicas is the number of instances created by the InstanceSet controller from the InstanceSet version +indicated by UpdateRevisions.

+ + + + +currentRevision
+ +string + + + +

currentRevision, if not empty, indicates the version of the InstanceSet used to generate instances in the +sequence [0,currentReplicas).

+ + + + +updateRevision
+ +string + + + +

updateRevision, if not empty, indicates the version of the InstanceSet used to generate instances in the sequence +[replicas-updatedReplicas,replicas)

+ + + + +conditions
+ +
+[]Kubernetes meta/v1.Condition -

-(Members of StatefulSetStatus are embedded into this type.) -

+(Optional) +

Represents the latest available observations of an instanceset’s current state. +Known .status.conditions.type are: “InstanceFailure”, “InstanceReady”

-initReplicas
+availableReplicas
int32 -

Defines the initial number of pods (members) when the cluster is first initialized. -This value is set to spec.Replicas at the time of object creation and remains constant thereafter.

+(Optional) +

Total number of available instances (ready for at least minReadySeconds) targeted by this InstanceSet.

-readyInitReplicas
+initReplicas
int32 (Optional) -

Represents the number of pods (members) that have already reached the MembersStatus during the cluster initialization stage. -This value remains constant once it equals InitReplicas.

+

Defines the initial number of instances when the cluster is first initialized. +This value is set to spec.Replicas at the time of object creation and remains constant thereafter. +Used only when spec.roles set.

-currentGeneration
+readyInitReplicas
-int64 +int32 (Optional) -

When not empty, indicates the version of the InstanceSet used to generate the underlying workload.

+

Represents the number of instances that have already reached the MembersStatus during the cluster initialization stage. +This value remains constant once it equals InitReplicas. +Used only when spec.roles set.

@@ -24076,6 +24183,18 @@ int64 +readyWithoutPrimary
+ +bool + + + +(Optional) +

Indicates whether it is required for the InstanceSet to have at least one primary instance ready.

+ + + + currentRevisions
map[string]string @@ -24350,30 +24469,6 @@ ReplicaRole

Defines the role of the replica in the cluster.

- - -ready
- -bool - - - -(Optional) -

Whether the corresponding Pod is in ready condition.

- - - - -readyWithoutPrimary
- -bool - - - -(Optional) -

Indicates whether it is required for the InstanceSet to have at least one primary instance ready.

- -

MemberUpdateStrategy diff --git a/pkg/controller/component/pod_utils.go b/pkg/controller/component/pod_utils.go index 114a518b7c9..f9d99523b30 100644 --- a/pkg/controller/component/pod_utils.go +++ b/pkg/controller/component/pod_utils.go @@ -21,17 +21,12 @@ package component import ( "context" - "strconv" - appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" "sigs.k8s.io/controller-runtime/pkg/client" appsv1alpha1 "github.com/apecloud/kubeblocks/apis/apps/v1alpha1" - workloads "github.com/apecloud/kubeblocks/apis/workloads/v1alpha1" "github.com/apecloud/kubeblocks/pkg/constant" - intctrlutil "github.com/apecloud/kubeblocks/pkg/controllerutil" "github.com/apecloud/kubeblocks/pkg/generics" ) @@ -59,48 +54,3 @@ func GetComponentPodListWithRole(ctx context.Context, cli client.Reader, cluster } return podList, nil } - -// IsComponentPodsWithLatestRevision checks whether the underlying pod spec matches the one declared in the Cluster/Component. -func IsComponentPodsWithLatestRevision(ctx context.Context, cli client.Reader, - cluster *appsv1alpha1.Cluster, its *workloads.InstanceSet) (bool, error) { - if cluster == nil || its == nil { - return false, nil - } - // check whether component spec has been sent to the underlying workload - itsComponentGeneration := its.GetAnnotations()[constant.KubeBlocksGenerationKey] - if cluster.Status.ObservedGeneration != cluster.Generation || - itsComponentGeneration != strconv.FormatInt(cluster.Generation, 10) { - return false, nil - } - // check whether its spec has been sent to the underlying workload - if its.Status.ObservedGeneration != its.Generation { - return false, nil - } - if its.Status.CurrentGeneration != its.Generation { - return false, nil - } - - // TODO: depends on the workload (InstanceSet) - // check whether the underlying workload(sts) has sent the latest template to pods - sts := &appsv1.StatefulSet{} - if err := cli.Get(ctx, client.ObjectKeyFromObject(its), sts); err != nil { - if apierrors.IsNotFound(err) { - return true, nil - } - return false, err - } - if sts.Status.ObservedGeneration != sts.Generation { - return false, nil - } - pods, err := ListPodOwnedByComponent(ctx, cli, its.Namespace, its.Spec.Selector.MatchLabels, inDataContext()) - if err != nil { - return false, err - } - for _, pod := range pods { - if intctrlutil.GetPodRevision(pod) != sts.Status.UpdateRevision { - return false, nil - } - } - - return true, nil -} diff --git a/pkg/controller/factory/builder.go b/pkg/controller/factory/builder.go index 539f18c6656..923abda7952 100644 --- a/pkg/controller/factory/builder.go +++ b/pkg/controller/factory/builder.go @@ -51,7 +51,7 @@ import ( viper "github.com/apecloud/kubeblocks/pkg/viperx" ) -// BuildInstanceSet builds a InstanceSet object from SynthesizedComponent. +// BuildInstanceSet builds an InstanceSet object from SynthesizedComponent. func BuildInstanceSet(synthesizedComp *component.SynthesizedComponent, componentDef *appsv1alpha1.ComponentDefinition) (*workloads.InstanceSet, error) { var ( clusterDefName = synthesizedComp.ClusterDefName diff --git a/pkg/controller/instanceset/reconciler_revision_update.go b/pkg/controller/instanceset/reconciler_revision_update.go index 22d3093450c..0ee0e83983b 100644 --- a/pkg/controller/instanceset/reconciler_revision_update.go +++ b/pkg/controller/instanceset/reconciler_revision_update.go @@ -20,6 +20,9 @@ along with this program. If not, see . package instanceset import ( + corev1 "k8s.io/api/core/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + workloads "github.com/apecloud/kubeblocks/apis/workloads/v1alpha1" "github.com/apecloud/kubeblocks/pkg/controller/kubebuilderx" "github.com/apecloud/kubeblocks/pkg/controller/model" @@ -94,6 +97,11 @@ func (r *revisionUpdateReconciler) Reconcile(tree *kubebuilderx.ObjectTree) (*ku updateRevision = instanceRevisionList[len(instanceRevisionList)-1].revision } its.Status.UpdateRevision = updateRevision + updatedReplicas, err := calculateUpdatedReplicas(its, tree.List(&corev1.Pod{})) + if err != nil { + return nil, err + } + its.Status.UpdatedReplicas = updatedReplicas // The 'ObservedGeneration' field is used to indicate whether the revisions have been updated. // Computing these revisions in each reconciliation loop can be time-consuming, so we optimize it by // performing the computation only when the 'spec' is updated. @@ -102,4 +110,20 @@ func (r *revisionUpdateReconciler) Reconcile(tree *kubebuilderx.ObjectTree) (*ku return tree, nil } +func calculateUpdatedReplicas(its *workloads.InstanceSet, pods []client.Object) (int32, error) { + updatedReplicas := int32(0) + for i := range pods { + pod, _ := pods[i].(*corev1.Pod) + updated, err := IsPodUpdated(its, pod) + if err != nil { + return 0, nil + } + if updated { + updatedReplicas++ + } + + } + return updatedReplicas, nil +} + var _ kubebuilderx.Reconciler = &revisionUpdateReconciler{} diff --git a/pkg/controller/instanceset/reconciler_status.go b/pkg/controller/instanceset/reconciler_status.go index d1c02ec0db1..e63f4b97b65 100644 --- a/pkg/controller/instanceset/reconciler_status.go +++ b/pkg/controller/instanceset/reconciler_status.go @@ -20,9 +20,12 @@ along with this program. If not, see . package instanceset import ( - "golang.org/x/exp/slices" + "encoding/json" + corev1 "k8s.io/api/core/v1" - "k8s.io/kubectl/pkg/util/podutils" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/sets" workloads "github.com/apecloud/kubeblocks/apis/workloads/v1alpha1" "github.com/apecloud/kubeblocks/pkg/constant" @@ -51,10 +54,10 @@ func (r *statusReconciler) Reconcile(tree *kubebuilderx.ObjectTree) (*kubebuilde its, _ := tree.GetRoot().(*workloads.InstanceSet) // 1. get all pods pods := tree.List(&corev1.Pod{}) - var podList []corev1.Pod + var podList []*corev1.Pod for _, object := range pods { pod, _ := object.(*corev1.Pod) - podList = append(podList, *pod) + podList = append(podList, pod) } // 2. calculate status summary updateRevisions, err := getUpdateRevisions(its.Status.UpdateRevisions) @@ -64,13 +67,15 @@ func (r *statusReconciler) Reconcile(tree *kubebuilderx.ObjectTree) (*kubebuilde replicas := int32(0) currentReplicas, updatedReplicas := int32(0), int32(0) readyReplicas, availableReplicas := int32(0), int32(0) - for i := range podList { - pod := &podList[i] + notReadyNames := sets.New[string]() + for _, pod := range podList { if isCreated(pod) { + notReadyNames.Insert(pod.Name) replicas++ } if isRunningAndReady(pod) { readyReplicas++ + notReadyNames.Delete(pod.Name) if isRunningAndAvailable(pod, its.Spec.MinReadySeconds) { availableReplicas++ } @@ -93,7 +98,6 @@ func (r *statusReconciler) Reconcile(tree *kubebuilderx.ObjectTree) (*kubebuilde its.Status.AvailableReplicas = availableReplicas its.Status.CurrentReplicas = currentReplicas its.Status.UpdatedReplicas = updatedReplicas - its.Status.CurrentGeneration = its.Generation // all pods have been updated totalReplicas := int32(1) if its.Spec.Replicas != nil { @@ -104,40 +108,111 @@ func (r *statusReconciler) Reconcile(tree *kubebuilderx.ObjectTree) (*kubebuilde its.Status.CurrentRevision = its.Status.UpdateRevision its.Status.CurrentReplicas = totalReplicas } + readyCondition, err := buildReadyCondition(its, readyReplicas >= replicas, notReadyNames) + if err != nil { + return nil, err + } + meta.SetStatusCondition(&its.Status.Conditions, *readyCondition) + + // 3. set InstanceFailure condition + failureCondition, err := buildFailureCondition(its, podList) + if err != nil { + return nil, err + } + if failureCondition != nil { + meta.SetStatusCondition(&its.Status.Conditions, *failureCondition) + } + + // 4. set members status + setMembersStatus(its, podList) - // 3. set members status - setMembersStatus(its, &podList) + // 5. set readyWithoutPrimary + // TODO(free6om): should put this field to the spec + setReadyWithPrimary(its, podList) return tree, nil } -func setMembersStatus(its *workloads.InstanceSet, pods *[]corev1.Pod) { +func buildReadyCondition(its *workloads.InstanceSet, ready bool, notReadyNames sets.Set[string]) (*metav1.Condition, error) { + condition := &metav1.Condition{ + Type: string(workloads.InstanceReady), + Status: metav1.ConditionTrue, + ObservedGeneration: its.Generation, + Reason: workloads.ReasonReady, + } + if !ready { + condition.Status = metav1.ConditionFalse + condition.Reason = workloads.ReasonNotReady + names := notReadyNames.UnsortedList() + baseSort(names, func(i int) (string, int) { + return ParseParentNameAndOrdinal(names[i]) + }, nil, true) + message, err := json.Marshal(names) + if err != nil { + return nil, err + } + condition.Message = string(message) + } + return condition, nil +} + +func buildFailureCondition(its *workloads.InstanceSet, pods []*corev1.Pod) (*metav1.Condition, error) { + var failureNames []string + for _, pod := range pods { + if pod.Status.Phase == corev1.PodFailed { + failureNames = append(failureNames, pod.Name) + } + } + if len(failureNames) == 0 { + return nil, nil + } + baseSort(failureNames, func(i int) (string, int) { + return ParseParentNameAndOrdinal(failureNames[i]) + }, nil, true) + message, err := json.Marshal(failureNames) + if err != nil { + return nil, err + } + return &metav1.Condition{ + Type: string(workloads.InstanceFailure), + Status: metav1.ConditionTrue, + ObservedGeneration: its.Generation, + Reason: workloads.ReasonInstanceFailure, + Message: string(message), + }, nil +} + +func setReadyWithPrimary(its *workloads.InstanceSet, pods []*corev1.Pod) { + readyWithoutPrimary := false + for _, pod := range pods { + if value, ok := pod.Labels[constant.ReadyWithoutPrimaryKey]; ok && value == "true" { + readyWithoutPrimary = true + break + } + } + its.Status.ReadyWithoutPrimary = readyWithoutPrimary +} + +func setMembersStatus(its *workloads.InstanceSet, pods []*corev1.Pod) { // no roles defined if its.Spec.Roles == nil { - setMembersStatusWithoutRole(its, pods) return } // compose new status newMembersStatus := make([]workloads.MemberStatus, 0) roleMap := composeRoleMap(*its) - for _, pod := range *pods { - if !intctrlutil.PodIsReadyWithLabel(pod) { + for _, pod := range pods { + if !intctrlutil.PodIsReadyWithLabel(*pod) { continue } - readyWithoutPrimary := false - roleName := GetRoleName(pod) + roleName := getRoleName(pod) role, ok := roleMap[roleName] if !ok { continue } - if value, ok := pod.Labels[constant.ReadyWithoutPrimaryKey]; ok && value == "true" { - readyWithoutPrimary = true - } memberStatus := workloads.MemberStatus{ - PodName: pod.Name, - ReplicaRole: &role, - Ready: true, - ReadyWithoutPrimary: readyWithoutPrimary, + PodName: pod.Name, + ReplicaRole: &role, } newMembersStatus = append(newMembersStatus, memberStatus) } @@ -148,21 +223,6 @@ func setMembersStatus(its *workloads.InstanceSet, pods *[]corev1.Pod) { its.Status.MembersStatus = newMembersStatus } -func setMembersStatusWithoutRole(its *workloads.InstanceSet, pods *[]corev1.Pod) { - var membersStatus []workloads.MemberStatus - for _, pod := range *pods { - memberStatus := workloads.MemberStatus{ - PodName: pod.Name, - Ready: podutils.IsPodReady(&pod), - } - membersStatus = append(membersStatus, memberStatus) - } - slices.SortStableFunc(membersStatus, func(a, b workloads.MemberStatus) bool { - return a.PodName < b.PodName - }) - its.Status.MembersStatus = membersStatus -} - func sortMembersStatus(membersStatus []workloads.MemberStatus, rolePriorityMap map[string]int) { getRolePriorityFunc := func(i int) int { role := membersStatus[i].ReplicaRole.Name diff --git a/pkg/controller/instanceset/reconciler_status_test.go b/pkg/controller/instanceset/reconciler_status_test.go index 55ffd7115c7..83d92be7a23 100644 --- a/pkg/controller/instanceset/reconciler_status_test.go +++ b/pkg/controller/instanceset/reconciler_status_test.go @@ -20,6 +20,7 @@ along with this program. If not, see . package instanceset import ( + "encoding/json" "time" . "github.com/onsi/ginkgo/v2" @@ -104,7 +105,6 @@ var _ = Describe("status reconciler test", func() { Expect(its.Status.UpdatedReplicas).Should(BeEquivalentTo(0)) Expect(its.Status.CurrentReplicas).Should(BeEquivalentTo(0)) Expect(its.Status.CurrentRevisions).Should(HaveLen(0)) - Expect(its.Status.CurrentGeneration).Should(BeEquivalentTo(its.Generation)) By("make all pods ready with old revision") condition := corev1.PodCondition{ @@ -112,7 +112,7 @@ var _ = Describe("status reconciler test", func() { Status: corev1.ConditionTrue, LastTransitionTime: metav1.NewTime(time.Now().Add(-1 * minReadySeconds * time.Second)), } - makePodAvailableWithOldRevision := func(pod *corev1.Pod, revision string) { + makePodAvailableWithRevision := func(pod *corev1.Pod, revision string) { pod.Labels[appsv1.ControllerRevisionHashLabelKey] = revision pod.Status.Phase = corev1.PodRunning pod.Status.Conditions = append(pod.Status.Conditions, condition) @@ -121,7 +121,7 @@ var _ = Describe("status reconciler test", func() { for _, object := range pods { pod, ok := object.(*corev1.Pod) Expect(ok).Should(BeTrue()) - makePodAvailableWithOldRevision(pod, "old-revision") + makePodAvailableWithRevision(pod, "old-revision") } _, err = reconciler.Reconcile(newTree) Expect(err).Should(BeNil()) @@ -131,7 +131,6 @@ var _ = Describe("status reconciler test", func() { Expect(its.Status.UpdatedReplicas).Should(BeEquivalentTo(0)) Expect(its.Status.CurrentReplicas).Should(BeEquivalentTo(replicas)) Expect(its.Status.CurrentRevisions).Should(HaveLen(0)) - Expect(its.Status.CurrentGeneration).Should(BeEquivalentTo(its.Generation)) By("make all pods available with latest revision") updateRevisions, err := getUpdateRevisions(its.Status.UpdateRevisions) @@ -139,7 +138,7 @@ var _ = Describe("status reconciler test", func() { for _, object := range pods { pod, ok := object.(*corev1.Pod) Expect(ok).Should(BeTrue()) - makePodAvailableWithOldRevision(pod, updateRevisions[pod.Name]) + makePodAvailableWithRevision(pod, updateRevisions[pod.Name]) } _, err = reconciler.Reconcile(newTree) Expect(err).Should(BeNil()) @@ -149,16 +148,41 @@ var _ = Describe("status reconciler test", func() { Expect(its.Status.UpdatedReplicas).Should(BeEquivalentTo(replicas)) Expect(its.Status.CurrentReplicas).Should(BeEquivalentTo(replicas)) Expect(its.Status.CurrentRevisions).Should(Equal(its.Status.UpdateRevisions)) - Expect(its.Status.CurrentGeneration).Should(BeEquivalentTo(its.Generation)) + + By("make all pods failed") + for _, object := range pods { + pod, ok := object.(*corev1.Pod) + Expect(ok).Should(BeTrue()) + pod.Status.Phase = corev1.PodFailed + } + _, err = reconciler.Reconcile(newTree) + Expect(err).Should(BeNil()) + Expect(its.Status.Replicas).Should(BeEquivalentTo(replicas)) + Expect(its.Status.ReadyReplicas).Should(BeEquivalentTo(0)) + Expect(its.Status.AvailableReplicas).Should(BeEquivalentTo(0)) + Expect(its.Status.UpdatedReplicas).Should(BeEquivalentTo(replicas)) + Expect(its.Status.CurrentReplicas).Should(BeEquivalentTo(replicas)) + Expect(its.Status.CurrentRevisions).Should(Equal(its.Status.UpdateRevisions)) + Expect(its.Status.Conditions).Should(HaveLen(2)) + failureNames := []string{"bar-0", "bar-1", "bar-2", "bar-3", "bar-foo-0", "bar-foo-1", "bar-hello-0"} + message, err := json.Marshal(failureNames) + Expect(err).Should(BeNil()) + Expect(its.Status.Conditions[0].Type).Should(BeEquivalentTo(workloads.InstanceReady)) + Expect(its.Status.Conditions[0].Status).Should(BeEquivalentTo(metav1.ConditionFalse)) + Expect(its.Status.Conditions[0].Reason).Should(BeEquivalentTo(workloads.ReasonNotReady)) + Expect(its.Status.Conditions[0].Message).Should(BeEquivalentTo(message)) + Expect(its.Status.Conditions[1].Type).Should(BeEquivalentTo(workloads.InstanceFailure)) + Expect(its.Status.Conditions[1].Reason).Should(BeEquivalentTo(workloads.ReasonInstanceFailure)) + Expect(its.Status.Conditions[1].Message).Should(BeEquivalentTo(message)) }) }) Context("setMembersStatus function", func() { It("should work well", func() { - pods := []corev1.Pod{ - *builder.NewPodBuilder(namespace, "pod-0").AddLabels(RoleLabelKey, "follower").GetObject(), - *builder.NewPodBuilder(namespace, "pod-1").AddLabels(RoleLabelKey, "leader").GetObject(), - *builder.NewPodBuilder(namespace, "pod-2").AddLabels(RoleLabelKey, "follower").GetObject(), + pods := []*corev1.Pod{ + builder.NewPodBuilder(namespace, "pod-0").AddLabels(RoleLabelKey, "follower").GetObject(), + builder.NewPodBuilder(namespace, "pod-1").AddLabels(RoleLabelKey, "leader").GetObject(), + builder.NewPodBuilder(namespace, "pod-2").AddLabels(RoleLabelKey, "follower").GetObject(), } readyCondition := corev1.PodCondition{ Type: corev1.PodReady, @@ -183,7 +207,7 @@ var _ = Describe("status reconciler test", func() { replicas := int32(3) its.Spec.Replicas = &replicas its.Status.MembersStatus = oldMembersStatus - setMembersStatus(its, &pods) + setMembersStatus(its, pods) Expect(its.Status.MembersStatus).Should(HaveLen(2)) Expect(its.Status.MembersStatus[0].PodName).Should(Equal("pod-1")) diff --git a/pkg/controller/instanceset/update_plan.go b/pkg/controller/instanceset/update_plan.go index fa0c02f8cff..9fe0d595298 100644 --- a/pkg/controller/instanceset/update_plan.go +++ b/pkg/controller/instanceset/update_plan.go @@ -137,7 +137,7 @@ func (p *realUpdatePlan) buildBestEffortParallelUpdatePlan(rolePriorityMap map[s index := 0 podList := p.pods for i, pod := range podList { - roleName := GetRoleName(pod) + roleName := getRoleName(&pod) if rolePriorityMap[roleName] <= learnerPriority { vertex := &model.ObjectVertex{Obj: &podList[i]} p.dag.AddConnect(preVertex, vertex) @@ -151,7 +151,7 @@ func (p *realUpdatePlan) buildBestEffortParallelUpdatePlan(rolePriorityMap map[s podList = podList[index:] followerCount := 0 for _, pod := range podList { - roleName := GetRoleName(pod) + roleName := getRoleName(&pod) if rolePriorityMap[roleName] < leaderPriority { followerCount++ } diff --git a/pkg/controller/instanceset/utils.go b/pkg/controller/instanceset/utils.go index 5b9158423c0..cef83cf416f 100644 --- a/pkg/controller/instanceset/utils.go +++ b/pkg/controller/instanceset/utils.go @@ -71,7 +71,7 @@ func ComposeRolePriorityMap(roles []workloads.ReplicaRole) map[string]int { // reverse it if reverse==true func SortPods(pods []corev1.Pod, rolePriorityMap map[string]int, reverse bool) { getRolePriorityFunc := func(i int) int { - role := GetRoleName(pods[i]) + role := getRoleName(&pods[i]) return rolePriorityMap[role] } getNameNOrdinalFunc := func(i int) (string, int) { @@ -80,8 +80,8 @@ func SortPods(pods []corev1.Pod, rolePriorityMap map[string]int, reverse bool) { baseSort(pods, getNameNOrdinalFunc, getRolePriorityFunc, reverse) } -// GetRoleName gets role name of pod 'pod' -func GetRoleName(pod corev1.Pod) string { +// getRoleName gets role name of pod 'pod' +func getRoleName(pod *corev1.Pod) string { return strings.ToLower(pod.Labels[constant.RoleLabelKey]) } @@ -97,7 +97,7 @@ func IsInstanceSetReady(its *workloads.InstanceSet) bool { return false } // check whether latest spec has been sent to the underlying workload - if its.Status.ObservedGeneration != its.Generation || its.Status.CurrentGeneration != its.Generation { + if its.Status.ObservedGeneration != its.Generation { return false } // check whether the underlying workload is ready @@ -122,11 +122,11 @@ func IsInstanceSetReady(its *workloads.InstanceSet) bool { if len(membersStatus) != int(*its.Spec.Replicas) { return false } + if its.Status.ReadyWithoutPrimary { + return true + } hasLeader := false for _, status := range membersStatus { - if status.ReadyWithoutPrimary { - return true - } if status.ReplicaRole != nil && status.ReplicaRole.IsLeader { hasLeader = true break diff --git a/pkg/controller/instanceset/utils_test.go b/pkg/controller/instanceset/utils_test.go index 5f4df512fef..7d0c237edf1 100644 --- a/pkg/controller/instanceset/utils_test.go +++ b/pkg/controller/instanceset/utils_test.go @@ -152,10 +152,10 @@ var _ = Describe("utils test", func() { }) }) - Context("GetRoleName function", func() { + Context("getRoleName function", func() { It("should work well", func() { pod := builder.NewPodBuilder(namespace, name).AddLabels(RoleLabelKey, "LEADER").GetObject() - role := GetRoleName(*pod) + role := getRoleName(pod) Expect(role).Should(Equal("leader")) }) }) diff --git a/pkg/testutil/k8s/instance_set_util.go b/pkg/testutil/k8s/instance_set_util.go index 50587953ef3..d5da501dfbc 100644 --- a/pkg/testutil/k8s/instance_set_util.go +++ b/pkg/testutil/k8s/instance_set_util.go @@ -25,7 +25,6 @@ import ( "strings" "github.com/onsi/gomega" - appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" @@ -67,15 +66,13 @@ func NewFakeInstanceSet(name string, replicas int) *workloads.InstanceSet { ServiceName: "governingsvc", }, Status: workloads.InstanceSetStatus{ - InitReplicas: itsReplicas, - StatefulSetStatus: appsv1.StatefulSetStatus{ - AvailableReplicas: itsReplicas, - ObservedGeneration: 0, - ReadyReplicas: itsReplicas, - UpdatedReplicas: itsReplicas, - CurrentRevision: Revision, - UpdateRevision: Revision, - }, + InitReplicas: itsReplicas, + AvailableReplicas: itsReplicas, + ObservedGeneration: 0, + ReadyReplicas: itsReplicas, + UpdatedReplicas: itsReplicas, + CurrentRevision: Revision, + UpdateRevision: Revision, }, } } @@ -93,7 +90,6 @@ func MockInstanceSetReady(its *workloads.InstanceSet, pods ...*corev1.Pod) { its.Status.ReadyInitReplicas = *its.Spec.Replicas its.Status.AvailableReplicas = *its.Spec.Replicas its.Status.ObservedGeneration = its.Generation - its.Status.CurrentGeneration = its.Generation its.Status.Replicas = *its.Spec.Replicas its.Status.ReadyReplicas = *its.Spec.Replicas its.Status.CurrentRevision = its.Status.UpdateRevision @@ -176,6 +172,5 @@ func InitInstanceSetStatus(testCtx testutil.TestContext, its *workloads.Instance its.Status.UpdateRevision = controllerRevision its.Status.CurrentRevision = controllerRevision its.Status.ObservedGeneration = its.Generation - its.Status.CurrentGeneration = its.Generation })).Should(gomega.Succeed()) }