From 6365ffa6d0380562a449fc54de21f791dfdf0d1b Mon Sep 17 00:00:00 2001 From: Rui Vieira Date: Mon, 16 Oct 2023 23:43:22 +0100 Subject: [PATCH 1/2] fix: Refactor status updates (#119) * Refactor status updates * Add status unit tests --- README.md | 25 +++ api/v1alpha1/trustyaiservice_types.go | 24 +++ controllers/conditions.go | 22 --- controllers/constants.go | 14 ++ controllers/inference_services.go | 19 +- controllers/statuses.go | 52 ++++++ controllers/suite_test.go | 206 +++++++++++++++++++--- controllers/trustyaiservice_controller.go | 115 ++---------- 8 files changed, 327 insertions(+), 150 deletions(-) delete mode 100644 controllers/conditions.go create mode 100644 controllers/statuses.go diff --git a/README.md b/README.md index 906748c5..edab78d2 100644 --- a/README.md +++ b/README.md @@ -132,6 +132,31 @@ If these parameters are unspecified, the [default image and tag](config/base/par If you'd like to change the service image/tag after deploying the operator, simply change the parameters in the KFDef. Any TrustyAI service deployed subsequently will use the new image and tag. +### `TrustyAIService` Status Updates + +The `TrustyAIService` custom resource tracks the availability of `InferenceServices` and `PersistentVolumeClaims (PVCs)` +through its `status` field. Below are the status types and reasons that are available: + +#### `InferenceService` Status + +| Status Type | Status Reason | Description | +|-------------------------------|-----------------------------------|-----------------------------------| +| `InferenceServicesPresent` | `InferenceServicesNotFound` | InferenceServices were not found. | +| `InferenceServicesPresent` | `InferenceServicesFound` | InferenceServices were found. | + +#### `PersistentVolumeClaim` (PVCs) Status + +| Status Type | Status Reason | Description | +|------------------|-----------------|------------------------------------| +| `PVCAvailable` | `PVCNotFound` | `PersistentVolumeClaim` not found. | +| `PVCAvailable` | `PVCFound` | `PersistentVolumeClaim` found. | + + +#### Status Behavior + +- If a PVC is not available, the `Ready` status of `TrustyAIService` will be set to `False`. +- However, if `InferenceServices` are not found, the `Ready` status of `TrustyAIService` will not be affected, _i.e._, it is `Ready` by all other conditions, it will remain so. + ## Contributing Please see the [CONTRIBUTING.md](./CONTRIBUTING.md) file for more details on how to contribute to this project. diff --git a/api/v1alpha1/trustyaiservice_types.go b/api/v1alpha1/trustyaiservice_types.go index ebd8f26a..59f2bb7c 100644 --- a/api/v1alpha1/trustyaiservice_types.go +++ b/api/v1alpha1/trustyaiservice_types.go @@ -89,3 +89,27 @@ type TrustyAIServiceList struct { func init() { SchemeBuilder.Register(&TrustyAIService{}, &TrustyAIServiceList{}) } + +// SetStatus sets the status of the TrustyAIService +func (t *TrustyAIService) SetStatus(condType, reason, message string, status corev1.ConditionStatus) { + now := metav1.Now() + condition := Condition{ + Type: condType, + Status: status, + Reason: reason, + Message: message, + LastTransitionTime: now, + } + // Replace or append condition + found := false + for i, cond := range t.Status.Conditions { + if cond.Type == condType { + t.Status.Conditions[i] = condition + found = true + break + } + } + if !found { + t.Status.Conditions = append(t.Status.Conditions, condition) + } +} diff --git a/controllers/conditions.go b/controllers/conditions.go deleted file mode 100644 index 3e8d2ee7..00000000 --- a/controllers/conditions.go +++ /dev/null @@ -1,22 +0,0 @@ -package controllers - -import ( - trustyaiopendatahubiov1alpha1 "github.com/trustyai-explainability/trustyai-service-operator/api/v1alpha1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -func (r *TrustyAIServiceReconciler) setCondition(instance *trustyaiopendatahubiov1alpha1.TrustyAIService, condition trustyaiopendatahubiov1alpha1.Condition) error { - condition.LastTransitionTime = metav1.Now() - - for i, c := range instance.Status.Conditions { - if c.Type == condition.Type { - if c.Status != condition.Status || c.Reason != condition.Reason || c.Message != condition.Message { - instance.Status.Conditions[i] = condition - } - return nil - } - } - - instance.Status.Conditions = append(instance.Status.Conditions, condition) - return nil -} diff --git a/controllers/constants.go b/controllers/constants.go index 0d1d6241..ff680244 100644 --- a/controllers/constants.go +++ b/controllers/constants.go @@ -14,3 +14,17 @@ const ( volumeMountName = "volume" defaultRequeueDelay = time.Minute ) + +// Status types +const ( + StatusTypeInferenceServicesPresent = "InferenceServicesPresent" + StatusTypePVCAvailable = "PVCAvailable" +) + +// Status reasons +const ( + StatusReasonInferenceServicesNotFound = "InferenceServicesNotFound" + StatusReasonInferenceServicesFound = "InferenceServicesFound" + StatusReasonPVCNotFound = "PVCNotFound" + StatusReasonPVCFound = "PVCFound" +) diff --git a/controllers/inference_services.go b/controllers/inference_services.go index 507c3015..02b3499c 100644 --- a/controllers/inference_services.go +++ b/controllers/inference_services.go @@ -4,6 +4,7 @@ import ( "context" "fmt" kservev1beta1 "github.com/kserve/kserve/pkg/apis/serving/v1beta1" + trustyaiopendatahubiov1alpha1 "github.com/trustyai-explainability/trustyai-service-operator/api/v1alpha1" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "sigs.k8s.io/controller-runtime/pkg/client" @@ -127,7 +128,7 @@ func generateEnvVarValue(currentValue, newValue string, remove bool) string { return currentValue } -func (r *TrustyAIServiceReconciler) handleInferenceServices(ctx context.Context, namespace string, labelKey string, labelValue string, envVarName string, crName string, remove bool) (bool, error) { +func (r *TrustyAIServiceReconciler) handleInferenceServices(ctx context.Context, instance *trustyaiopendatahubiov1alpha1.TrustyAIService, namespace string, labelKey string, labelValue string, envVarName string, crName string, remove bool) (bool, error) { var inferenceServices kservev1beta1.InferenceServiceList if err := r.List(ctx, &inferenceServices, client.InNamespace(namespace)); err != nil { @@ -135,6 +136,15 @@ func (r *TrustyAIServiceReconciler) handleInferenceServices(ctx context.Context, return false, err } + if len(inferenceServices.Items) == 0 { + _, updateErr := r.updateStatus(ctx, instance, UpdateInferenceServiceNotPresent) + if updateErr != nil { + log.FromContext(ctx).Error(updateErr, "Could not update status for InferenceService not present") + return false, updateErr + } + return true, nil + } + for _, infService := range inferenceServices.Items { annotations := infService.GetAnnotations() // Check the annotation "serving.kserve.io/deploymentMode: ModelMesh" @@ -152,6 +162,13 @@ func (r *TrustyAIServiceReconciler) handleInferenceServices(ctx context.Context, } } } + + instance.SetStatus("InferenceServicesPresent", "InferenceServicesFound", "InferenceServices found", corev1.ConditionTrue) + _, updateErr := r.updateStatus(ctx, instance, UpdateInferenceServicePresent) + if updateErr != nil { + log.FromContext(ctx).Error(updateErr, "Could not update status for InferenceService present") + return false, updateErr + } return true, nil } diff --git a/controllers/statuses.go b/controllers/statuses.go new file mode 100644 index 00000000..10954237 --- /dev/null +++ b/controllers/statuses.go @@ -0,0 +1,52 @@ +package controllers + +import ( + "context" + trustyaiopendatahubiov1alpha1 "github.com/trustyai-explainability/trustyai-service-operator/api/v1alpha1" + v1 "k8s.io/api/core/v1" + "k8s.io/client-go/util/retry" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" +) + +func (r *TrustyAIServiceReconciler) updateStatus(ctx context.Context, original *trustyaiopendatahubiov1alpha1.TrustyAIService, update func(saved *trustyaiopendatahubiov1alpha1.TrustyAIService), +) (*trustyaiopendatahubiov1alpha1.TrustyAIService, error) { + saved := &trustyaiopendatahubiov1alpha1.TrustyAIService{} + err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + err := r.Client.Get(ctx, client.ObjectKeyFromObject(original), saved) + if err != nil { + return err + } + // update status here + update(saved) + + // Try to update + err = r.Client.Status().Update(ctx, saved) + return err + }) + if err != nil { + log.FromContext(ctx).Error(err, "Failed to update TrustyAIService status") + } + return saved, err +} + +func UpdateInferenceServiceNotPresent(saved *trustyaiopendatahubiov1alpha1.TrustyAIService) { + saved.SetStatus(StatusTypeInferenceServicesPresent, StatusReasonInferenceServicesNotFound, "InferenceServices not found", v1.ConditionFalse) + saved.Status.Phase = "Not Ready" + saved.Status.Ready = v1.ConditionFalse + +} + +func UpdateInferenceServicePresent(saved *trustyaiopendatahubiov1alpha1.TrustyAIService) { + saved.SetStatus(StatusTypeInferenceServicesPresent, StatusReasonInferenceServicesFound, "InferenceServices found", v1.ConditionTrue) +} + +func UpdatePVCNotAvailable(saved *trustyaiopendatahubiov1alpha1.TrustyAIService) { + saved.SetStatus(StatusTypePVCAvailable, StatusReasonPVCNotFound, "PersistentVolumeClaim not found", v1.ConditionFalse) + saved.Status.Phase = "Not Ready" + saved.Status.Ready = v1.ConditionFalse +} + +func UpdatePVCAvailable(saved *trustyaiopendatahubiov1alpha1.TrustyAIService) { + saved.SetStatus(StatusTypePVCAvailable, StatusReasonPVCFound, "PersistentVolumeClaim found", v1.ConditionTrue) +} diff --git a/controllers/suite_test.go b/controllers/suite_test.go index 0a78113e..7d24903d 100644 --- a/controllers/suite_test.go +++ b/controllers/suite_test.go @@ -23,6 +23,7 @@ import ( kservev1beta1 "github.com/kserve/kserve/pkg/apis/serving/v1beta1" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + routev1 "github.com/openshift/api/route/v1" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" trustyaiopendatahubiov1alpha1 "github.com/trustyai-explainability/trustyai-service-operator/api/v1alpha1" appsv1 "k8s.io/api/apps/v1" @@ -64,11 +65,11 @@ func TestAPIs(t *testing.T) { RunSpecs(t, "Controller Suite") } -func createDefaultCR() *trustyaiopendatahubiov1alpha1.TrustyAIService { +func createDefaultCR(namespaceCurrent string) *trustyaiopendatahubiov1alpha1.TrustyAIService { service := trustyaiopendatahubiov1alpha1.TrustyAIService{ ObjectMeta: metav1.ObjectMeta{ Name: name, - Namespace: namespace, + Namespace: namespaceCurrent, }, Spec: trustyaiopendatahubiov1alpha1.TrustyAIServiceSpec{ Storage: trustyaiopendatahubiov1alpha1.StorageSpec{ @@ -142,6 +143,82 @@ func createMockPV(ctx context.Context, k8sClient client.Client, pvName string, s return nil } +func removeFinalizerAndDeleteInstance(ctx context.Context, k8sClient client.Client, instance *trustyaiopendatahubiov1alpha1.TrustyAIService, finalizerName string) { + // Get the latest state of the TrustyAIService instance + _ = k8sClient.Get(ctx, client.ObjectKey{Name: instance.Name, Namespace: instance.Namespace}, instance) + + // Remove the finalizer from the TrustyAIService instance + finalizerIndex := -1 + for i, f := range instance.Finalizers { + if f == finalizerName { + finalizerIndex = i + break + } + } + if finalizerIndex >= 0 { + instance.Finalizers = append(instance.Finalizers[:finalizerIndex], instance.Finalizers[finalizerIndex+1:]...) + _ = k8sClient.Update(ctx, instance) + } + + // Delete the TrustyAIService instance + _ = k8sClient.Delete(ctx, instance) +} + +// Function to create the InferenceService +func createInferenceService(name string, namespace string) *kservev1beta1.InferenceService { + return &kservev1beta1.InferenceService{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + Spec: kservev1beta1.InferenceServiceSpec{ + Predictor: kservev1beta1.PredictorSpec{ + Model: &kservev1beta1.ModelSpec{ + ModelFormat: kservev1beta1.ModelFormat{ + Name: "sklearn", + }, + }, + }, + }, + } +} + +func checkTrustyAIServiceCondition(client client.Client, instance *trustyaiopendatahubiov1alpha1.TrustyAIService, expectedType string, expectedStatus corev1.ConditionStatus, expectedReason string) error { + err := client.Get(context.TODO(), types.NamespacedName{ + Namespace: instance.Namespace, + Name: instance.Name, + }, instance) + + if err != nil { + return err + } + + for _, condition := range instance.Status.Conditions { + if condition.Type == expectedType && condition.Status == expectedStatus && condition.Reason == expectedReason { + return nil // Condition matches expectations + } + } + + return fmt.Errorf("Condition did not match expectations. Expected Type: %s, Status: %s, Reason: %s", expectedType, expectedStatus, expectedReason) +} + +func checkTrustyAIServiceReadyStatus(client client.Client, instance *trustyaiopendatahubiov1alpha1.TrustyAIService, expectedStatus corev1.ConditionStatus) error { + err := client.Get(context.TODO(), types.NamespacedName{ + Namespace: instance.Namespace, + Name: instance.Name, + }, instance) + + if err != nil { + return err + } + + if instance.Status.Ready == expectedStatus { + return nil // Ready status matches expectations + } + + return fmt.Errorf("Ready status did not match expectations. Expected: %s, Actual: %s", expectedStatus, instance.Status.Ready) +} + var _ = BeforeSuite(func() { logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true))) @@ -175,6 +252,10 @@ var _ = BeforeSuite(func() { err = kservev1beta1.AddToScheme(scheme.Scheme) Expect(err).NotTo(HaveOccurred()) + // Add Routes + err = routev1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + //+kubebuilder:scaffold:scheme k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) @@ -210,45 +291,108 @@ var _ = AfterSuite(func() { var _ = Describe("TrustyAI operator", func() { - Context("Testing deployment with defaults", func() { + Context("Testing deployment with defaults and no InferenceService", func() { var instance *trustyaiopendatahubiov1alpha1.TrustyAIService BeforeEach(func() { - instance = createDefaultCR() - Eventually(func() error { - return createNamespace(ctx, k8sClient, namespace) - }, time.Second*10, time.Millisecond*250).Should(Succeed(), "failed to create namespace") - // Create a mock PV for the tests Eventually(func() error { return createMockPV(ctx, k8sClient, "mypv", "1Gi") }, time.Second*10, time.Millisecond*250).Should(Succeed(), "failed to create PV") }) - AfterEach(func() { - // Delete the TrustyAIService instance - Expect(k8sClient.Delete(ctx, instance)).Should(Succeed()) + It("should deploy the service with defaults", func() { + ctx = context.Background() + thisNamespace := "trusty-ns-1" + instance = createDefaultCR(thisNamespace) + Eventually(func() error { + return createNamespace(ctx, k8sClient, thisNamespace) + }, time.Second*10, time.Millisecond*250).Should(Succeed(), "failed to create namespace") - // Delete the namespace - namespaceObj := &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: namespace}} - Expect(k8sClient.Delete(ctx, namespaceObj)).Should(Succeed()) + Expect(k8sClient.Create(ctx, instance)).Should(Succeed()) + deployment := &appsv1.Deployment{} + Eventually(func() error { + // Define name for the deployment created by the operator + namespacedNamed := types.NamespacedName{ + Namespace: thisNamespace, + Name: name, + } + return k8sClient.Get(ctx, namespacedNamed, deployment) + }, time.Second*10, time.Millisecond*250).Should(Succeed(), "failed to get Deployment") + + Expect(*deployment.Spec.Replicas).Should(Equal(int32(1))) + Expect(deployment.Namespace).Should(Equal(thisNamespace)) + Expect(deployment.Name).Should(Equal(name)) + Expect(deployment.Labels["app"]).Should(Equal(name)) + Expect(deployment.Labels["app.kubernetes.io/name"]).Should(Equal(name)) + Expect(deployment.Labels["app.kubernetes.io/instance"]).Should(Equal(name)) + Expect(deployment.Labels["app.kubernetes.io/part-of"]).Should(Equal(componentName)) + Expect(deployment.Labels["app.kubernetes.io/version"]).Should(Equal("0.1.0")) + + Expect(deployment.Spec.Template.Spec.Containers[0].Image).Should(Equal("quay.io/trustyai/trustyai-service:latest")) + + service := &corev1.Service{} + Eventually(func() error { + return k8sClient.Get(ctx, types.NamespacedName{Name: name, Namespace: thisNamespace}, service) + }, time.Second*10, time.Millisecond*250).Should(Succeed(), "failed to get Service") + + Expect(service.Annotations["prometheus.io/path"]).Should(Equal("/q/metrics")) + Expect(service.Annotations["prometheus.io/scheme"]).Should(Equal("http")) + Expect(service.Annotations["prometheus.io/scrape"]).Should(Equal("true")) + Expect(service.Namespace).Should(Equal(thisNamespace)) + + Eventually(func() error { + return checkTrustyAIServiceCondition(k8sClient, instance, StatusTypePVCAvailable, corev1.ConditionTrue, StatusReasonPVCFound) + }, time.Second*10, time.Millisecond*250).Should(Succeed(), "failed to get the correct condition") + + // No InferenceService deployed yet + Eventually(func() error { + return checkTrustyAIServiceCondition(k8sClient, instance, StatusTypeInferenceServicesPresent, corev1.ConditionFalse, StatusReasonInferenceServicesNotFound) + }, time.Second*10, time.Millisecond*250).Should(Succeed(), "failed to get the correct condition") }) - It("should deploy the service with defaults", func() { + It("should deploy the service with defaults and one InferenceService", func() { ctx = context.Background() + thisNamespace := "trusty-ns-2" + instance = createDefaultCR(thisNamespace) + Eventually(func() error { + return createNamespace(ctx, k8sClient, thisNamespace) + }, time.Second*10, time.Millisecond*250).Should(Succeed(), "failed to create namespace") + + // Creating the InferenceService + inferenceService := createInferenceService("my-model", thisNamespace) + Expect(k8sClient.Create(ctx, inferenceService)).Should(Succeed()) + + timeout := 10 * time.Second + interval := 1 * time.Second + + // Wait for the InferenceService to be ready + Eventually(func() bool { + err := k8sClient.Get(ctx, client.ObjectKey{ + Namespace: inferenceService.Namespace, + Name: inferenceService.Name, + }, inferenceService) + + if err != nil { + return false + } + + return true // return true when the InferenceService is ready + }, timeout, interval).Should(BeTrue()) + Expect(k8sClient.Create(ctx, instance)).Should(Succeed()) deployment := &appsv1.Deployment{} Eventually(func() error { // Define name for the deployment created by the operator namespacedNamed := types.NamespacedName{ - Namespace: namespace, + Namespace: thisNamespace, Name: name, } return k8sClient.Get(ctx, namespacedNamed, deployment) }, time.Second*10, time.Millisecond*250).Should(Succeed(), "failed to get Deployment") Expect(*deployment.Spec.Replicas).Should(Equal(int32(1))) - Expect(deployment.Namespace).Should(Equal(namespace)) + Expect(deployment.Namespace).Should(Equal(thisNamespace)) Expect(deployment.Name).Should(Equal(name)) Expect(deployment.Labels["app"]).Should(Equal(name)) Expect(deployment.Labels["app.kubernetes.io/name"]).Should(Equal(name)) @@ -260,13 +404,22 @@ var _ = Describe("TrustyAI operator", func() { service := &corev1.Service{} Eventually(func() error { - return k8sClient.Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, service) + return k8sClient.Get(ctx, types.NamespacedName{Name: name, Namespace: thisNamespace}, service) }, time.Second*10, time.Millisecond*250).Should(Succeed(), "failed to get Service") Expect(service.Annotations["prometheus.io/path"]).Should(Equal("/q/metrics")) Expect(service.Annotations["prometheus.io/scheme"]).Should(Equal("http")) Expect(service.Annotations["prometheus.io/scrape"]).Should(Equal("true")) - Expect(service.Namespace).Should(Equal(namespace)) + Expect(service.Namespace).Should(Equal(thisNamespace)) + + Eventually(func() error { + return checkTrustyAIServiceCondition(k8sClient, instance, StatusTypePVCAvailable, corev1.ConditionTrue, StatusReasonPVCFound) + }, time.Second*10, time.Millisecond*250).Should(Succeed(), "failed to get the correct condition") + + // No InferenceService deployed yet + Eventually(func() error { + return checkTrustyAIServiceCondition(k8sClient, instance, StatusTypeInferenceServicesPresent, corev1.ConditionTrue, StatusReasonInferenceServicesFound) + }, time.Second*20, time.Millisecond*250).Should(Succeed(), "failed to get the correct condition") }) @@ -279,11 +432,11 @@ var _ = Describe("TrustyAI operator", func() { BeforeEach(func() { instances = make([]*trustyaiopendatahubiov1alpha1.TrustyAIService, len(namespaces)) - for i, namespace := range namespaces { - instances[i] = createDefaultCR() - instances[i].Namespace = namespace + for i, thisNamespace := range namespaces { + instances[i] = createDefaultCR(thisNamespace) + instances[i].Namespace = thisNamespace Eventually(func() error { - return createNamespace(ctx, k8sClient, namespace) + return createNamespace(ctx, k8sClient, thisNamespace) }, time.Second*10, time.Millisecond*250).Should(Succeed(), "failed to create namespace") } }) @@ -296,14 +449,14 @@ var _ = Describe("TrustyAI operator", func() { Eventually(func() error { // Define name for the deployment created by the operator namespacedNamed := types.NamespacedName{ - Namespace: namespace, + Namespace: instance.Namespace, Name: name, } return k8sClient.Get(ctx, namespacedNamed, deployment) }, time.Second*10, time.Millisecond*250).Should(Succeed(), "failed to get Deployment") Expect(*deployment.Spec.Replicas).Should(Equal(int32(1))) - Expect(deployment.Namespace).Should(Equal(namespace)) + Expect(deployment.Namespace).Should(Equal(instance.Namespace)) Expect(deployment.Name).Should(Equal(name)) Expect(deployment.Labels["app"]).Should(Equal(name)) Expect(deployment.Labels["app.kubernetes.io/name"]).Should(Equal(name)) @@ -315,13 +468,12 @@ var _ = Describe("TrustyAI operator", func() { service := &corev1.Service{} Eventually(func() error { - return k8sClient.Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, service) + return k8sClient.Get(ctx, types.NamespacedName{Name: name, Namespace: instance.Namespace}, service) }, time.Second*10, time.Millisecond*250).Should(Succeed(), "failed to get Service") Expect(service.Annotations["prometheus.io/path"]).Should(Equal("/q/metrics")) Expect(service.Annotations["prometheus.io/scheme"]).Should(Equal("http")) Expect(service.Annotations["prometheus.io/scrape"]).Should(Equal("true")) - Expect(service.Namespace).Should(Equal(namespace)) } }) diff --git a/controllers/trustyaiservice_controller.go b/controllers/trustyaiservice_controller.go index 4554d969..2a41c2f2 100644 --- a/controllers/trustyaiservice_controller.go +++ b/controllers/trustyaiservice_controller.go @@ -127,7 +127,7 @@ func (r *TrustyAIServiceReconciler) Reconcile(ctx context.Context, req ctrl.Requ // CR found, add or update the URL // Call the function to patch environment variables for Deployments that match the label - shouldContinue, err := r.handleInferenceServices(ctx, req.Namespace, modelMeshLabelKey, modelMeshLabelValue, payloadProcessorName, req.Name, false) + shouldContinue, err := r.handleInferenceServices(ctx, instance, req.Namespace, modelMeshLabelKey, modelMeshLabelValue, payloadProcessorName, req.Name, false) if err != nil { return RequeueWithErrorMessage(ctx, err, "Could not patch environment variables for Deployments.") } @@ -135,50 +135,23 @@ func (r *TrustyAIServiceReconciler) Reconcile(ctx context.Context, req ctrl.Requ return RequeueWithDelayMessage(ctx, time.Minute, "Not all replicas are ready, requeue the reconcile request") } - // Update the instance status to Not Ready - instance.Status.Phase = "Not Ready" - instance.Status.Ready = corev1.ConditionFalse - // Ensure PVC err = r.ensurePVC(ctx, instance) if err != nil { // PVC not found condition - pvcAvailableCondition := trustyaiopendatahubiov1alpha1.Condition{ - Type: "PVCAvailable", - Status: corev1.ConditionFalse, - Reason: "PVCNotFound", - Message: "PersistentVolumeClaim not found", - } log.FromContext(ctx).Error(err, "Error creating PVC storage.") - - // Set the condition - if err = r.setCondition(instance, pvcAvailableCondition); err != nil { - log.FromContext(ctx).Error(err, "Failed to set condition") - } - - // Update the instance status to Not Ready - instance.Status.Phase = "Not Ready" - instance.Status.Ready = corev1.ConditionFalse - - // Update the status subresource - if updateErr := r.Status().Update(ctx, instance); updateErr != nil { - log.FromContext(ctx).Error(updateErr, "Failed to update TrustyAIService status") + _, updateErr := r.updateStatus(ctx, instance, UpdatePVCNotAvailable) + if updateErr != nil { + return RequeueWithErrorMessage(ctx, err, "Failed to update status") } // If there was an error finding the PV, requeue the request return RequeueWithErrorMessage(ctx, err, "Could not find requested PersistentVolumeClaim.") } else { - // Set the conditions appropriately - pvcAvailableCondition := trustyaiopendatahubiov1alpha1.Condition{ - Type: "PVCAvailable", - Status: corev1.ConditionTrue, - Reason: "PVCFound", - Message: "PersistentVolumeClaim found", - } - - if setConditionErr := r.setCondition(instance, pvcAvailableCondition); setConditionErr != nil { - return RequeueWithErrorMessage(ctx, setConditionErr, "Failed to set condition") + _, updateErr := r.updateStatus(ctx, instance, UpdatePVCAvailable) + if updateErr != nil { + return RequeueWithErrorMessage(ctx, err, "Failed to update status") } } @@ -227,14 +200,14 @@ func (r *TrustyAIServiceReconciler) Reconcile(ctx context.Context, req ctrl.Requ if err != nil { return RequeueWithError(err) } - - // At the end of reconcile, update the instance status to Ready - instance.Status.Phase = "Ready" - instance.Status.Ready = corev1.ConditionTrue - - // Populate statuses - if err = r.reconcileStatuses(instance, ctx); err != nil { - return RequeueWithErrorMessage(ctx, err, "Error creating the statuses.") + + _, updateErr := r.updateStatus(ctx, instance, func(saved *trustyaiopendatahubiov1alpha1.TrustyAIService) { + // At the end of reconcile, update the instance status to Ready + saved.Status.Phase = "Ready" + saved.Status.Ready = corev1.ConditionTrue + }) + if updateErr != nil { + return RequeueWithErrorMessage(ctx, err, "Failed to update status") } // Deployment already exists - requeue the request with a delay @@ -300,64 +273,6 @@ func (r *TrustyAIServiceReconciler) SetupWithManager(mgr ctrl.Manager) error { Complete(r) } -func (r *TrustyAIServiceReconciler) reconcileStatuses(instance *trustyaiopendatahubiov1alpha1.TrustyAIService, ctx context.Context) error { - deploymentList := &appsv1.DeploymentList{} - labelSelector := client.MatchingLabels{"app.kubernetes.io/name": "modelmesh-controller"} - err := r.Client.List(ctx, deploymentList, client.InNamespace("modelmesh-serving"), labelSelector) - - var condition trustyaiopendatahubiov1alpha1.Condition - if err != nil { - log.FromContext(ctx).Error(err, "Error creating condition.") - return err - } - - if len(deploymentList.Items) == 0 { - // No deployments found with the given label - condition = trustyaiopendatahubiov1alpha1.Condition{ - Type: "ModelMeshReady", - Status: "False", - LastTransitionTime: metav1.Now(), - Reason: "ModelMeshNotPresent", - Message: "ModelMesh operator Deployment not found", - } - } else { - // We'll just check the first deployment found - deployment := &deploymentList.Items[0] - // The Deployment exists, check if it's ready - if isDeploymentReady(deployment) { - condition = trustyaiopendatahubiov1alpha1.Condition{ - Type: "ModelMeshReady", - Status: "True", - LastTransitionTime: metav1.Now(), - Reason: "ModelMeshHealthy", - Message: "ModelMesh operator is running and healthy", - } - } else { - condition = trustyaiopendatahubiov1alpha1.Condition{ - Type: "ModelMeshReady", - Status: "False", - LastTransitionTime: metav1.Now(), - Reason: "ModelMeshNotHealthy", - Message: "ModelMesh operator Deployment is not healthy", - } - } - } - - // Update the condition - if err = r.setCondition(instance, condition); err != nil { - log.FromContext(ctx).Error(err, "Failed to set condition") - return err - } - - // Update the status of the custom resource - err = r.Status().Update(ctx, instance) - if err != nil { - log.FromContext(ctx).Error(err, "Error updating conditions.") - return err - } - return nil -} - // getTrustyAIImageAndTagFromConfigMap gets a custom TrustyAI image and tag from a ConfigMap in the operator's namespace func (r *TrustyAIServiceReconciler) getImageFromConfigMap(ctx context.Context) (string, error) { if r.Namespace != "" { From 964c1c50bfc4cf7256096ded3569b6036a61cb10 Mon Sep 17 00:00:00 2001 From: Rui Vieira Date: Tue, 17 Oct 2023 09:44:52 +0100 Subject: [PATCH 2/2] feat: Add Route status (#125) --- controllers/constants.go | 3 +++ controllers/statuses.go | 8 ++++++++ controllers/trustyaiservice_controller.go | 11 +++++++++-- 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/controllers/constants.go b/controllers/constants.go index ff680244..72a5533e 100644 --- a/controllers/constants.go +++ b/controllers/constants.go @@ -19,6 +19,7 @@ const ( const ( StatusTypeInferenceServicesPresent = "InferenceServicesPresent" StatusTypePVCAvailable = "PVCAvailable" + StatusTypeRouteAvailable = "RouteAvailable" ) // Status reasons @@ -27,4 +28,6 @@ const ( StatusReasonInferenceServicesFound = "InferenceServicesFound" StatusReasonPVCNotFound = "PVCNotFound" StatusReasonPVCFound = "PVCFound" + StatusReasonRouteNotFound = "RouteNotFound" + StatusReasonRouteFound = "RouteFound" ) diff --git a/controllers/statuses.go b/controllers/statuses.go index 10954237..79f4ad49 100644 --- a/controllers/statuses.go +++ b/controllers/statuses.go @@ -50,3 +50,11 @@ func UpdatePVCNotAvailable(saved *trustyaiopendatahubiov1alpha1.TrustyAIService) func UpdatePVCAvailable(saved *trustyaiopendatahubiov1alpha1.TrustyAIService) { saved.SetStatus(StatusTypePVCAvailable, StatusReasonPVCFound, "PersistentVolumeClaim found", v1.ConditionTrue) } + +func UpdateRouteAvailable(saved *trustyaiopendatahubiov1alpha1.TrustyAIService) { + saved.SetStatus(StatusTypeRouteAvailable, StatusReasonRouteFound, "Route found", v1.ConditionTrue) +} + +func UpdateRouteNotAvailable(saved *trustyaiopendatahubiov1alpha1.TrustyAIService) { + saved.SetStatus(StatusTypeRouteAvailable, StatusReasonRouteNotFound, "Route not found", v1.ConditionFalse) +} diff --git a/controllers/trustyaiservice_controller.go b/controllers/trustyaiservice_controller.go index 2a41c2f2..73cb5ee2 100644 --- a/controllers/trustyaiservice_controller.go +++ b/controllers/trustyaiservice_controller.go @@ -198,10 +198,17 @@ func (r *TrustyAIServiceReconciler) Reconcile(ctx context.Context, req ctrl.Requ // Create route err = r.reconcileRoute(instance, ctx) if err != nil { - return RequeueWithError(err) + // Could not create Route object, update status and return. + _, updateErr := r.updateStatus(ctx, instance, UpdateRouteNotAvailable) + if updateErr != nil { + return RequeueWithErrorMessage(ctx, err, "Failed to update status") + } + return RequeueWithErrorMessage(ctx, err, "Failed to get or create Route") } - + _, updateErr := r.updateStatus(ctx, instance, func(saved *trustyaiopendatahubiov1alpha1.TrustyAIService) { + // Set Route has available + UpdateRouteAvailable(saved) // At the end of reconcile, update the instance status to Ready saved.Status.Phase = "Ready" saved.Status.Ready = corev1.ConditionTrue