From 52848c25ce7b76617ac2ad2c9f3a1c53ab9cb61c Mon Sep 17 00:00:00 2001
From: Xingdong <xdlisjtu@gmail.com>
Date: Sun, 3 Nov 2024 20:00:45 +0800
Subject: [PATCH 1/9] Fault tests

---
 .github/workflows/fault.yml                   | 111 +++++
 api/Dockerfile                                |  12 +-
 .../managers/solution/solution-manager.go     |  22 +-
 .../v1alpha1/managers/stage/stage-manager.go  |   1 +
 .../stage/materialize/materialize.go          |   1 +
 api/pkg/apis/v1alpha1/vendors/job-vendor.go   |   3 +-
 .../apis/v1alpha1/vendors/solution-vendor.go  |   4 +
 api/pkg/apis/v1alpha1/vendors/stage-vendor.go |   1 +
 .../v1alpha2/providers/pubsub/redis/redis.go  |   1 +
 k8s/Dockerfile                                |  13 +-
 k8s/apis/fabric/v1/target_webhook.go          |   2 +
 .../fabric/target_polling_controller.go       |   1 -
 .../solution/instance_polling_controller.go   |   4 +-
 k8s/reconcilers/deployment.go                 |   9 +-
 test/integration/lib/testhelpers/helpers.go   |  18 +
 test/integration/lib/testhelpers/kubeutil.go  | 122 ++++++
 test/integration/magefile.go                  |  25 ++
 .../scenarios/faultTests/README.md            |   0
 .../scenarios/faultTests/constants.go         | 124 ++++++
 .../scenarios/faultTests/magefile.go          |  70 ++++
 .../update/manifestTemplates/instance.yaml    |  16 +
 .../manifestTemplates/solution-container.yaml |   5 +
 .../update/manifestTemplates/solution.yaml    |  13 +
 .../update/manifestTemplates/target.yaml      |  29 ++
 .../solution/update/verify/manifest_test.go   | 353 ++++++++++++++++
 .../materialize/manifest/activation.yaml      |   8 +
 .../manifest/campaign-container.yaml          |   5 +
 .../materialize/manifest/campaign.yaml        |  49 +++
 .../manifest/catalog-catalog-2.yaml           |  23 ++
 .../manifest/catalog-catalog-container-2.yaml |   5 +
 .../manifest/catalog-catalog-container.yaml   |   5 +
 .../materialize/manifest/catalog-catalog.yaml |  18 +
 .../manifest/instance-catalog-container.yaml  |   5 +
 .../manifest/instance-catalog.yaml            |  16 +
 .../manifest/solution-catalog-container.yaml  |   5 +
 .../manifest/solution-catalog.yaml            |  21 +
 .../manifest/target-catalog-container.yaml    |   5 +
 .../materialize/manifest/target-catalog.yaml  |  24 ++
 .../materialize/verify/manifest_test.go       | 389 ++++++++++++++++++
 test/localenv/magefile.go                     |  18 +
 40 files changed, 1544 insertions(+), 12 deletions(-)
 create mode 100644 .github/workflows/fault.yml
 create mode 100644 test/integration/scenarios/faultTests/README.md
 create mode 100644 test/integration/scenarios/faultTests/constants.go
 create mode 100644 test/integration/scenarios/faultTests/magefile.go
 create mode 100755 test/integration/scenarios/faultTests/solution/update/manifestTemplates/instance.yaml
 create mode 100644 test/integration/scenarios/faultTests/solution/update/manifestTemplates/solution-container.yaml
 create mode 100755 test/integration/scenarios/faultTests/solution/update/manifestTemplates/solution.yaml
 create mode 100755 test/integration/scenarios/faultTests/solution/update/manifestTemplates/target.yaml
 create mode 100644 test/integration/scenarios/faultTests/solution/update/verify/manifest_test.go
 create mode 100644 test/integration/scenarios/faultTests/workflow/materialize/manifest/activation.yaml
 create mode 100644 test/integration/scenarios/faultTests/workflow/materialize/manifest/campaign-container.yaml
 create mode 100644 test/integration/scenarios/faultTests/workflow/materialize/manifest/campaign.yaml
 create mode 100644 test/integration/scenarios/faultTests/workflow/materialize/manifest/catalog-catalog-2.yaml
 create mode 100644 test/integration/scenarios/faultTests/workflow/materialize/manifest/catalog-catalog-container-2.yaml
 create mode 100644 test/integration/scenarios/faultTests/workflow/materialize/manifest/catalog-catalog-container.yaml
 create mode 100644 test/integration/scenarios/faultTests/workflow/materialize/manifest/catalog-catalog.yaml
 create mode 100644 test/integration/scenarios/faultTests/workflow/materialize/manifest/instance-catalog-container.yaml
 create mode 100644 test/integration/scenarios/faultTests/workflow/materialize/manifest/instance-catalog.yaml
 create mode 100644 test/integration/scenarios/faultTests/workflow/materialize/manifest/solution-catalog-container.yaml
 create mode 100644 test/integration/scenarios/faultTests/workflow/materialize/manifest/solution-catalog.yaml
 create mode 100644 test/integration/scenarios/faultTests/workflow/materialize/manifest/target-catalog-container.yaml
 create mode 100644 test/integration/scenarios/faultTests/workflow/materialize/manifest/target-catalog.yaml
 create mode 100644 test/integration/scenarios/faultTests/workflow/materialize/verify/manifest_test.go

diff --git a/.github/workflows/fault.yml b/.github/workflows/fault.yml
new file mode 100644
index 000000000..4f74cdabf
--- /dev/null
+++ b/.github/workflows/fault.yml
@@ -0,0 +1,111 @@
+# This workflow will build a golang project
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-go
+
+name: fault
+
+on:
+  push:
+    branches:
+      - main
+      - 'release/**'
+  pull_request:
+    branches:
+      - main
+      - 'release/**'
+  workflow_dispatch:
+env:
+    ContainerRegistry: "ghcr.io"
+    ContainerRegistryRepo: "ghcr.io/eclipse-symphony"
+
+jobs:
+
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v3
+
+    - name: Set up Go
+      uses: actions/setup-go@v3
+      with:
+        go-version: 1.22.4
+        
+    - name: Set up custom GOPATH
+      run: |
+        mkdir -p /home/runner/go
+        echo "export GOPATH=/home/runner/go" >> $HOME/.bashrc
+        echo "export PATH=\$PATH:\$GOPATH/bin" >> $HOME/.bashrc
+        source $HOME/.bashrc
+
+    - name: Install make
+      run: sudo apt-get update && sudo apt-get install -y build-essential
+
+    - name: Check docker version and images
+      run: docker --version && docker images
+
+    - name: Install kubectl
+      run: |
+        curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
+        chmod +x kubectl
+        sudo mv ./kubectl /usr/local/bin/kubectl
+        kubectl version --client
+        kubectl config view
+
+    - name: Install Helm
+      run: |
+        curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3
+        chmod 700 get_helm.sh
+        ./get_helm.sh
+
+    - name: Install minikube
+      run: | 
+        curl -Lo minikube https://storage.googleapis.com/minikube/releases/latest/minikube-linux-amd64
+        chmod +x minikube
+        sudo mv minikube /usr/local/bin/
+        minikube start
+        kubectl config view
+    
+    - name: Install Mage
+      run: |
+        cd ..
+        git clone https://github.com/magefile/mage
+        cd mage
+        go run bootstrap.go
+        cd ..
+
+    - name: Login to Docker Hub
+      uses: docker/login-action@v3
+      with:
+        registry: ${{ env.ContainerRegistry }}
+        username: ${{ github.repository_owner }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+
+    - name: Build docker images
+      run: |
+        cd test/localenv/
+        mage build:apifault
+        mage build:k8sfault
+        mage cluster:up
+    
+    - name: Go work init
+      run: |
+        mv go.work.bk go.work
+
+    - name: Run fault tests 
+      run: |
+        cd test/integration/scenarios/faultTests/ && mage faulttests
+
+    - name: Collect and upload symphony logs
+      uses: actions/upload-artifact@v4
+      with:
+        name: symphony-logs
+        path: |
+          /tmp/symphony-integration-test-logs/**/*.log
+      continue-on-error: true
+      if: always()
+        
+
+    
+
+    
+
+    
\ No newline at end of file
diff --git a/api/Dockerfile b/api/Dockerfile
index 2fcec060c..677ca76b1 100644
--- a/api/Dockerfile
+++ b/api/Dockerfile
@@ -13,6 +13,7 @@ ARG TARGETPLATFORM
 ARG BUILDPLATFORM
 ARG TARGETOS
 ARG TARGETARCH
+ARG FAULT_INJECTION_ENABLED=false
 
 ENV CGO_ENABLED=0
 
@@ -23,6 +24,14 @@ COPY ./api /workspace/api
 WORKDIR /workspace/api
 # File permissions are not preserved when copying files in ADO. 
 RUN chmod +x pkg/apis/v1alpha1/providers/target/script/mock-*.sh
+
+# Install gofail
+RUN if [ "$FAULT_INJECTION_ENABLED" == "true" ]; then \
+    go install go.etcd.io/gofail@latest && \
+    find /workspace/api -type d | while read -r dir; do gofail enable $dir; done && \
+    find /workspace/coa -type d | while read -r dir; do gofail enable $dir; done && \
+    cd /workspace/api && go get go.etcd.io/gofail/runtime; \
+fi
 RUN CGO_ENABLED=${CGO_ENABLED} GOOS=${TARGETOS} GOARCH=${TARGETARCH} GODEBUG=netdns=cgo go build -o /dist/symphony-api
 
 FROM ${TARGET_BASE_IMAGE}
@@ -51,5 +60,6 @@ ADD ./api/symphony-api.json /
 EXPOSE 8080
 EXPOSE 8081
 ENV LOG_LEVEL=Debug
+ENV GOFAIL_HTTP "127.0.0.1:22381"
 # ENV CONFIG /symphony-api.json
-CMD sh -c 'if [ -f /etc/pki/ca-trust/source/anchors/proxy-cert.crt ]; then update-ca-trust; fi && exec /symphony-api -c $CONFIG -l $LOG_LEVEL'
+CMD sh -c 'if [ -f /etc/pki/ca-trust/source/anchors/proxy-cert.crt ]; then update-ca-trust; fi && exec /symphony-api -c $CONFIG -l $LOG_LEVEL'
\ No newline at end of file
diff --git a/api/pkg/apis/v1alpha1/managers/solution/solution-manager.go b/api/pkg/apis/v1alpha1/managers/solution/solution-manager.go
index 8b65648fb..f8aa5e8ed 100644
--- a/api/pkg/apis/v1alpha1/managers/solution/solution-manager.go
+++ b/api/pkg/apis/v1alpha1/managers/solution/solution-manager.go
@@ -12,6 +12,7 @@ import (
 	"errors"
 	"fmt"
 	"os"
+	"runtime/debug"
 	"strconv"
 	"strings"
 	"sync"
@@ -323,11 +324,16 @@ func (s *SolutionManager) Reconcile(ctx context.Context, deployment model.Deploy
 		return summary, err
 	}
 	defer func() {
-		log.DebugfCtx(ctx, " M (Solution): Reconcile conclude Summary. Namespace: %v, deployment instance: %v, summary message: %v", namespace, deployment.Instance, summary.SummaryMessage)
-		if deployment.IsDryRun {
-			summary.SuccessCount = 0
+		if r := recover(); r == nil {
+			log.DebugfCtx(ctx, " M (Solution): Reconcile conclude Summary. Namespace: %v, deployment instance: %v, summary message: %v", namespace, deployment.Instance, summary.SummaryMessage)
+			if deployment.IsDryRun {
+				summary.SuccessCount = 0
+			}
+			s.concludeSummary(ctx, deployment.Instance.ObjectMeta.Name, deployment.Generation, deployment.Hash, summary, namespace)
+		} else {
+			log.ErrorfCtx(ctx, " M (Solution): panic happens: %v", debug.Stack())
+			panic(r)
 		}
-		s.concludeSummary(ctx, deployment.Instance.ObjectMeta.Name, deployment.Generation, deployment.Hash, summary, namespace)
 	}()
 
 	defer func() {
@@ -417,6 +423,8 @@ func (s *SolutionManager) Reconcile(ctx context.Context, deployment model.Deploy
 		return summary, err
 	}
 	log.DebugfCtx(ctx, " M (Solution): reconcile save summary progress: start deploy, total %v deployments", summary.PlannedDeployment)
+	// DO NOT REMOVE THIS COMMENT
+	// gofail: var beforeProviders string
 
 	plannedCount := 0
 	planSuccessCount := 0
@@ -546,6 +554,9 @@ func (s *SolutionManager) Reconcile(ctx context.Context, deployment model.Deploy
 
 	mergedState.ClearAllRemoved()
 
+	// DO NOT REMOVE THIS COMMENT
+	// gofail: var beforeDeploymentError string
+
 	if !deployment.IsDryRun {
 		if len(mergedState.TargetComponent) == 0 && remove {
 			log.DebugfCtx(ctx, " M (Solution): no assigned components to manage, deleting state")
@@ -577,6 +588,9 @@ func (s *SolutionManager) Reconcile(ctx context.Context, deployment model.Deploy
 		}
 	}
 
+	// DO NOT REMOVE THIS COMMENT
+	// gofail: var afterDeploymentError string
+
 	successCount := 0
 	for _, v := range targetResult {
 		successCount += v
diff --git a/api/pkg/apis/v1alpha1/managers/stage/stage-manager.go b/api/pkg/apis/v1alpha1/managers/stage/stage-manager.go
index b13791202..0ba93b44c 100644
--- a/api/pkg/apis/v1alpha1/managers/stage/stage-manager.go
+++ b/api/pkg/apis/v1alpha1/managers/stage/stage-manager.go
@@ -631,6 +631,7 @@ func (s *StageManager) HandleTriggerEvent(ctx context.Context, campaign model.Ca
 
 		waitGroup.Wait()
 		close(results)
+		// gofail: var afterProvider string
 
 		outputs := make(map[string]interface{})
 		delayedExit := false
diff --git a/api/pkg/apis/v1alpha1/providers/stage/materialize/materialize.go b/api/pkg/apis/v1alpha1/providers/stage/materialize/materialize.go
index d47c31b5c..16be7a9f2 100644
--- a/api/pkg/apis/v1alpha1/providers/stage/materialize/materialize.go
+++ b/api/pkg/apis/v1alpha1/providers/stage/materialize/materialize.go
@@ -553,6 +553,7 @@ func (i *MaterializeStageProvider) Process(ctx context.Context, mgrContext conte
 			}
 			createdObjectList[catalog.ObjectMeta.Name] = true
 		}
+		// gofail: var afterMaterializeOnce bool
 	}
 	if len(createdObjectList) < len(objects) {
 		errorMessage := "failed to create all objects:"
diff --git a/api/pkg/apis/v1alpha1/vendors/job-vendor.go b/api/pkg/apis/v1alpha1/vendors/job-vendor.go
index 06bf9048d..56cded851 100644
--- a/api/pkg/apis/v1alpha1/vendors/job-vendor.go
+++ b/api/pkg/apis/v1alpha1/vendors/job-vendor.go
@@ -74,8 +74,7 @@ func (e *JobVendor) Init(config vendors.VendorConfig, factories []managers.IMana
 			if err != nil && v1alpha2.IsDelayed(err) {
 				go e.Vendor.Context.Publish(topic, event)
 			}
-			// job reconciler already has a retry mechanism, return nil to avoid retrying
-			return nil
+			return err
 		},
 	})
 	e.Vendor.Context.Subscribe("heartbeat", v1alpha2.EventHandler{
diff --git a/api/pkg/apis/v1alpha1/vendors/solution-vendor.go b/api/pkg/apis/v1alpha1/vendors/solution-vendor.go
index b7c0688f9..90782fa23 100644
--- a/api/pkg/apis/v1alpha1/vendors/solution-vendor.go
+++ b/api/pkg/apis/v1alpha1/vendors/solution-vendor.go
@@ -132,6 +132,10 @@ func (c *SolutionVendor) onQueue(request v1alpha2.COARequest) v1alpha2.COARespon
 	case fasthttp.MethodPost:
 		ctx, span := observability.StartSpan("onQueue-POST", rContext, nil)
 		defer span.End()
+
+		// DO NOT REMOVE THIS COMMENT
+		// gofail: var onQueueError string
+
 		instance := request.Parameters["instance"]
 		delete := request.Parameters["delete"]
 		objectType := request.Parameters["objectType"]
diff --git a/api/pkg/apis/v1alpha1/vendors/stage-vendor.go b/api/pkg/apis/v1alpha1/vendors/stage-vendor.go
index 9d62e53eb..8af0f80fe 100644
--- a/api/pkg/apis/v1alpha1/vendors/stage-vendor.go
+++ b/api/pkg/apis/v1alpha1/vendors/stage-vendor.go
@@ -119,6 +119,7 @@ func (s *StageVendor) Init(config vendors.VendorConfig, factories []managers.IMa
 					Context: ctx,
 				})
 			}
+			// gofail: var afterPublishTrigger string
 			return nil
 		},
 		Group: "0",
diff --git a/coa/pkg/apis/v1alpha2/providers/pubsub/redis/redis.go b/coa/pkg/apis/v1alpha2/providers/pubsub/redis/redis.go
index fc492c852..358ed01ec 100644
--- a/coa/pkg/apis/v1alpha2/providers/pubsub/redis/redis.go
+++ b/coa/pkg/apis/v1alpha2/providers/pubsub/redis/redis.go
@@ -218,6 +218,7 @@ func (i *RedisPubSubProvider) pollNewMessages(topic string, handler v1alpha2.Eve
 	}()
 
 	for {
+		// gofail: var PollNewMessagesLoop string
 		if i.Ctx.Err() != nil {
 			return
 		}
diff --git a/k8s/Dockerfile b/k8s/Dockerfile
index 07570e435..407a541ee 100644
--- a/k8s/Dockerfile
+++ b/k8s/Dockerfile
@@ -23,6 +23,8 @@ ENV CGO_ENABLED=0
 
 ARG BUILD_BASE_IMAGE
 
+ARG FAULT_INJECTION_ENABLED=false
+
 # Install gcc, g++ and other necessary build tools
 RUN if echo "${BUILD_BASE_IMAGE}" | grep "alpine"; then \
         apk add --no-cache gcc musl-dev; \
@@ -50,11 +52,20 @@ RUN if echo "${BUILD_BASE_IMAGE}" | grep "mariner"; then \
         CGO_ENABLED=1 mage generate operatorTest; \
     fi
 
+# Install gofail
+RUN if [ "$FAULT_INJECTION_ENABLED" == "true" ]; then \
+    go install go.etcd.io/gofail@latest && \
+    find /k8s -type d | while read -r dir; do gofail enable $dir; done && \
+    cd /k8s && go get go.etcd.io/gofail/runtime; \
+fi
+
 # Build
 RUN CGO_ENABLED=0 mage build
 FROM ${TARGET_BASE_IMAGE} AS manager
+RUN apk update && apk add curl
 WORKDIR /
 COPY --from=builder /k8s/bin/manager .
 USER 65532:65532
+ENV GOFAIL_HTTP="127.0.0.1:22381"
 
-ENTRYPOINT ["/manager"]
+ENTRYPOINT ["/manager"]
\ No newline at end of file
diff --git a/k8s/apis/fabric/v1/target_webhook.go b/k8s/apis/fabric/v1/target_webhook.go
index 526c01500..fd8b6e033 100644
--- a/k8s/apis/fabric/v1/target_webhook.go
+++ b/k8s/apis/fabric/v1/target_webhook.go
@@ -133,6 +133,8 @@ func (r *Target) ValidateCreate() (admission.Warnings, error) {
 	operationName := fmt.Sprintf("%s/%s", constants.TargetOperationNamePrefix, constants.ActivityOperation_Write)
 	ctx := configutils.PopulateActivityAndDiagnosticsContextFromAnnotations(r.GetNamespace(), resourceK8SId, r.Annotations, operationName, myTargetClient, context.TODO(), targetlog)
 
+	// gofail: var validateError error
+
 	diagnostic.InfoWithCtx(targetlog, ctx, "validate create", "name", r.Name, "namespace", r.Namespace)
 	observ_utils.EmitUserAuditsLogs(ctx, "Target %s is being created on namespace %s", r.Name, r.Namespace)
 
diff --git a/k8s/controllers/fabric/target_polling_controller.go b/k8s/controllers/fabric/target_polling_controller.go
index 30261c992..d737cd38d 100644
--- a/k8s/controllers/fabric/target_polling_controller.go
+++ b/k8s/controllers/fabric/target_polling_controller.go
@@ -39,7 +39,6 @@ type TargetPollingReconciler struct {
 func (r *TargetPollingReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
 	log := ctrllog.FromContext(ctx)
 	diagnostic.InfoWithCtx(log, ctx, "Reconcile Polling Target", "Name", req.Name, "Namespace", req.Namespace)
-
 	// Initialize reconcileTime for latency metrics
 	reconcileTime := time.Now()
 
diff --git a/k8s/controllers/solution/instance_polling_controller.go b/k8s/controllers/solution/instance_polling_controller.go
index 7d927d43c..434dde3cf 100644
--- a/k8s/controllers/solution/instance_polling_controller.go
+++ b/k8s/controllers/solution/instance_polling_controller.go
@@ -41,6 +41,8 @@ func (r *InstancePollingReconciler) Reconcile(ctx context.Context, req ctrl.Requ
 	log := ctrllog.FromContext(ctx)
 	log.Info("Reconcile Polling Instance " + req.Name + " in namespace " + req.Namespace)
 
+	// gofail: var beforePollingResult string
+
 	// Initialize reconcileTime for latency metrics
 	reconcileTime := time.Now()
 
@@ -85,7 +87,7 @@ func (r *InstancePollingReconciler) Reconcile(ctx context.Context, req ctrl.Requ
 		metrics.InstanceResourceType,
 		deploymentOperationType,
 	)
-
+	// gofail: var afterPollingResult string
 	return reconcileResult, err
 }
 
diff --git a/k8s/reconcilers/deployment.go b/k8s/reconcilers/deployment.go
index 906284e3a..45c041250 100644
--- a/k8s/reconcilers/deployment.go
+++ b/k8s/reconcilers/deployment.go
@@ -147,6 +147,8 @@ func (r *DeploymentReconciler) populateDiagnosticsAndActivitiesFromAnnotations(c
 
 // attemptUpdate attempts to update the instance
 func (r *DeploymentReconciler) AttemptUpdate(ctx context.Context, object Reconcilable, isRemoval bool, log logr.Logger, operationStartTimeKey string, operationName string) (metrics.OperationStatus, reconcile.Result, error) {
+	// gofail: var delayAttemptUpdate string
+
 	// populate diagnostics and activities from annotations
 	ctx = r.populateDiagnosticsAndActivitiesFromAnnotations(ctx, object, operationName, r.kubeClient, log)
 	if !controllerutil.ContainsFinalizer(object, r.finalizerName) && !isRemoval {
@@ -194,11 +196,12 @@ func (r *DeploymentReconciler) AttemptUpdate(ctx context.Context, object Reconci
 		diagnostic.ErrorWithCtx(log, ctx, err, "failed to update jobid")
 		return metrics.StatusUpdateFailed, ctrl.Result{}, err
 	}
-
+	// gofail: var beforeQueueJob string
 	if err := r.queueDeploymentJob(ctx, object, isRemoval, operationStartTimeKey); err != nil {
 		diagnostic.ErrorWithCtx(log, ctx, err, "failed to queue deployment job")
 		return r.handleDeploymentError(ctx, object, nil, isRemoval, reconciliationInterval, err, log)
 	}
+	// gofail: var afterQueueJob string
 
 	diagnostic.InfoWithCtx(log, ctx, "Updating object status with deployment queued")
 	if _, err := r.updateObjectStatus(ctx, object, nil, patchStatusOptions{deploymentQueued: true}, log); err != nil {
@@ -218,6 +221,8 @@ func (r *DeploymentReconciler) AttemptUpdate(ctx context.Context, object Reconci
 }
 
 func (r *DeploymentReconciler) PollingResult(ctx context.Context, object Reconcilable, isRemoval bool, log logr.Logger, operationStartTimeKey string, operationName string) (metrics.OperationStatus, reconcile.Result, error) {
+	// gofail: var delayBeforePolling string
+
 	// populate diagnostics and activities from annotations
 	ctx = r.populateDiagnosticsAndActivitiesFromAnnotations(ctx, object, operationName, r.kubeClient, log)
 	// Get reconciliation interval
@@ -511,7 +516,7 @@ func (r *DeploymentReconciler) updateObjectStatus(ctx context.Context, object Re
 	nextStatus.LastModified = metav1.Now()
 	object.SetStatus(*nextStatus)
 
-	err = r.kubeClient.Status().Update(context.Background(), object)
+	err = r.kubeClient.Status().Update(ctx, object)
 	if err != nil {
 		diagnostic.ErrorWithCtx(log, ctx, err, "failed to update object status")
 	}
diff --git a/test/integration/lib/testhelpers/helpers.go b/test/integration/lib/testhelpers/helpers.go
index 35eba063d..2744fefb3 100644
--- a/test/integration/lib/testhelpers/helpers.go
+++ b/test/integration/lib/testhelpers/helpers.go
@@ -9,6 +9,7 @@ import (
 	"time"
 
 	"github.com/eclipse-symphony/symphony/test/integration/lib/shell"
+	"github.com/princjef/mageutil/shellcmd"
 )
 
 func SetupClusterWithTunnel() (context.CancelFunc, int, error) {
@@ -124,3 +125,20 @@ func isProcessRunning(pid int) bool {
 	err = process.Signal(syscall.Signal(0))
 	return err == nil
 }
+
+func InjectPodFailure() error {
+	InjectCommand := os.Getenv("InjectCommand")
+	InjectPodLabel := os.Getenv("InjectPodLabel")
+	if InjectCommand == "" || InjectPodLabel == "" {
+		fmt.Println("InjectCommand is ", InjectCommand, "and InjectPodLabel is ", InjectPodLabel, ", skip error injection")
+		return nil
+	}
+
+	WaitFailpointServer(InjectPodLabel)
+	err := shellcmd.Command(InjectCommand).Run()
+	if err != nil {
+		fmt.Println("Failed to inject pod failure: " + err.Error())
+	}
+	fmt.Println("Injected fault")
+	return err
+}
diff --git a/test/integration/lib/testhelpers/kubeutil.go b/test/integration/lib/testhelpers/kubeutil.go
index c51766365..990849c09 100644
--- a/test/integration/lib/testhelpers/kubeutil.go
+++ b/test/integration/lib/testhelpers/kubeutil.go
@@ -10,8 +10,12 @@ package testhelpers
 import (
 	"context"
 	"flag"
+	"fmt"
+	"net/http"
+	"os"
 	"path/filepath"
 	"sync"
+	"time"
 
 	corev1 "k8s.io/api/core/v1"
 	kerrors "k8s.io/apimachinery/pkg/api/errors"
@@ -19,6 +23,8 @@ import (
 	"k8s.io/client-go/kubernetes"
 	"k8s.io/client-go/rest"
 	"k8s.io/client-go/tools/clientcmd"
+	"k8s.io/client-go/tools/portforward"
+	"k8s.io/client-go/transport/spdy"
 	"k8s.io/client-go/util/homedir"
 )
 
@@ -97,3 +103,119 @@ func EnsureNamespace(namespace string) error {
 
 	return nil
 }
+
+func EnablePortForward(podlabel string, port string, stopChan chan struct{}) error {
+	config, err := RestConfig()
+	if err != nil {
+		return err
+	}
+
+	clientset, err := KubeClient()
+	if err != nil {
+		return err
+	}
+	pods := clientset.CoreV1().Pods("default")
+	podList, err := pods.List(context.Background(), metav1.ListOptions{
+		LabelSelector: podlabel,
+	})
+	if err != nil {
+		return err
+	}
+	pod := podList.Items[0]
+
+	// Create a port-forward request
+	url := clientset.CoreV1().RESTClient().Post().
+		Resource("pods").
+		Namespace("default").
+		Name(pod.Name).
+		SubResource("portforward").
+		URL()
+	transport, upgrader, err := spdy.RoundTripperFor(config)
+	if err != nil {
+		return err
+	}
+
+	dialer := spdy.NewDialer(upgrader, &http.Client{Transport: transport}, "POST", url)
+	// Set up port-forwarding
+	ports := []string{fmt.Sprintf("%s:%s", port, port)}
+	readyChan := make(chan struct{})
+	forwarder, err := portforward.New(dialer, ports, stopChan, readyChan, os.Stdout, os.Stderr)
+	errCh := make(chan error)
+	go func() {
+		errCh <- forwarder.ForwardPorts()
+		if err != nil {
+			fmt.Printf("Error in port-forwarding: %v\n", err)
+		}
+	}()
+
+	// Wait for the port-forwarding to be ready
+	select {
+	case <-readyChan:
+		fmt.Println("Port-forwarding is ready")
+		return nil
+	case <-time.After(time.Second * 10):
+		return fmt.Errorf("timeout waiting for port-forwarding to be ready")
+	case err = <-errCh:
+		return fmt.Errorf("forwarding ports: %v", err)
+	}
+}
+
+func WaitPodOnline(podlabel string) error {
+	clientset, err := KubeClient()
+	if err != nil {
+		return err
+	}
+	pods := clientset.CoreV1().Pods("default")
+	for i := 0; i < 10; i++ {
+		podList, err := pods.List(context.Background(), metav1.ListOptions{
+			LabelSelector: podlabel,
+		})
+		if err != nil {
+			return err
+		}
+		if len(podList.Items) > 0 {
+			pod := podList.Items[0]
+			if pod.Status.Phase == corev1.PodRunning {
+				return nil
+			}
+			fmt.Println("pod not ready yet, waiting..." + pod.Status.Phase)
+		}
+		time.Sleep(time.Second * 10)
+	}
+	return fmt.Errorf("timeout waiting for pod to be ready")
+}
+
+func WaitFailpointServer(podlabel string) error {
+	clientset, err := KubeClient()
+	if err != nil {
+		return err
+	}
+	err = WaitPodOnline(podlabel)
+	if err != nil {
+		return err
+	}
+	pods := clientset.CoreV1().Pods("default")
+	for i := 0; i < 10; i++ {
+		podList, err := pods.List(context.Background(), metav1.ListOptions{
+			LabelSelector: podlabel,
+		})
+		if err != nil {
+			return err
+		}
+		if len(podList.Items) > 0 {
+			pod := podList.Items[0]
+			if pod.Status.Phase == corev1.PodRunning {
+				err = ShellExec(fmt.Sprintf("kubectl exec %s -- curl localhost:22381", pod.Name))
+				if err == nil {
+					return nil
+				} else {
+					fmt.Println("failed to connect to failpoint server, waiting...")
+				}
+			} else {
+				fmt.Println("pod not ready yet, waiting..." + pod.Status.Phase)
+			}
+		}
+		time.Sleep(time.Second * 10)
+	}
+	return fmt.Errorf("timeout waiting for pod to be ready")
+}
diff --git a/test/integration/magefile.go b/test/integration/magefile.go
index 0af83b8b1..57ec80f83 100644
--- a/test/integration/magefile.go
+++ b/test/integration/magefile.go
@@ -119,3 +119,28 @@ func shellExec(cmd string) error {
 
 	return execCmd.Run()
 }
+
+func TestFault() error {
+	fmt.Println("Searching for integration tests")
+
+	scenariosPath, err := filepath.Abs("scenarios")
+	if err != nil {
+		return err
+	}
+
+	testFiles, err := listTests(scenariosPath)
+	if err != nil {
+		return err
+	}
+
+	for _, testFile := range testFiles {
+		fmt.Printf("Running tests in: %s\n", testFile)
+
+		err = RunTest(testFile)
+		if err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
diff --git a/test/integration/scenarios/faultTests/README.md b/test/integration/scenarios/faultTests/README.md
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/integration/scenarios/faultTests/constants.go b/test/integration/scenarios/faultTests/constants.go
new file mode 100644
index 000000000..74c253f27
--- /dev/null
+++ b/test/integration/scenarios/faultTests/constants.go
@@ -0,0 +1,124 @@
+//go:build mage
+
+/*
+ * Copyright (c) Microsoft Corporation.
+ * Licensed under the MIT license.
+ * SPDX-License-Identifier: MIT
+ */
+
+package main
+
+// Test config
+const (
+	TEST_TIMEOUT = "30m"
+)
+
+type FaultTestCase struct {
+	testCase  string
+	podLabel  string
+	fault     string
+	faultType string
+}
+
+var (
+	TestCases = map[string]string{
+		"solutionUpdate":      "./solution/update/verify/...",
+		"workflowMaterialize": "./workflow/materialize/verify/...",
+	}
+
+	PodLabels = map[string]string{
+		"api": "app=symphony-api",
+		"k8s": "control-plane=symphony-controller-manager",
+	}
+	Faults = []FaultTestCase{
+		{
+			testCase:  TestCases["solutionUpdate"],
+			podLabel:  PodLabels["api"],
+			fault:     "onQueueError",
+			faultType: DefaultFaultType,
+		},
+		{
+			testCase:  TestCases["solutionUpdate"],
+			podLabel:  PodLabels["api"],
+			fault:     "beforeProviders",
+			faultType: DefaultFaultType,
+		},
+		{
+			testCase:  TestCases["solutionUpdate"],
+			podLabel:  PodLabels["api"],
+			fault:     "beforeDeploymentError",
+			faultType: DefaultFaultType,
+		},
+		{
+			testCase:  TestCases["solutionUpdate"],
+			podLabel:  PodLabels["api"],
+			fault:     "afterDeploymentError",
+			faultType: DefaultFaultType,
+		},
+		{
+			testCase:  TestCases["solutionUpdate"],
+			podLabel:  PodLabels["api"],
+			fault:     "beforeConcludeSummary",
+			faultType: DefaultFaultType,
+		},
+		{
+			testCase:  TestCases["solutionUpdate"],
+			podLabel:  PodLabels["api"],
+			fault:     "beforeConcludeSummary",
+			faultType: DefaultFaultType,
+		},
+		{
+			testCase:  TestCases["solutionUpdate"],
+			podLabel:  PodLabels["k8s"],
+			fault:     "beforePollingResult",
+			faultType: DefaultFaultType,
+		},
+		{
+			testCase:  TestCases["solutionUpdate"],
+			podLabel:  PodLabels["k8s"],
+			fault:     "afterPollingResult",
+			faultType: DefaultFaultType,
+		},
+		{
+			testCase:  TestCases["solutionUpdate"],
+			podLabel:  PodLabels["k8s"],
+			fault:     "beforeQueueJob",
+			faultType: DefaultFaultType,
+		},
+		{
+			testCase:  TestCases["solutionUpdate"],
+			podLabel:  PodLabels["k8s"],
+			fault:     "afterQueueJob",
+			faultType: DefaultFaultType,
+		},
+		{
+			testCase:  TestCases["workflowMaterialize"],
+			podLabel:  PodLabels["api"],
+			fault:     "afterMaterializeOnce",
+			faultType: DefaultFaultType,
+		},
+		{
+			testCase:  TestCases["workflowMaterialize"],
+			podLabel:  PodLabels["api"],
+			fault:     "afterProvider",
+			faultType: DefaultFaultType,
+		},
+		// afterPublishTrigger fault test cannot pass now because of dedup issue in activation
+		// {
+		// 	testCase:  TestCases["workflowMaterialize"],
+		// 	podLabel:  PodLabels["api"],
+		// 	fault:     "afterPublishTrigger",
+		// 	faultType: DefaultFaultType,
+		// },
+		{
+			testCase:  TestCases["workflowMaterialize"],
+			podLabel:  PodLabels["api"],
+			fault:     "afterRunTrigger",
+			faultType: DefaultFaultType,
+		},
+	}
+
+	LocalPortForward = "22381"
+
+	DefaultFaultType = "100.0%panic"
+)
diff --git a/test/integration/scenarios/faultTests/magefile.go b/test/integration/scenarios/faultTests/magefile.go
new file mode 100644
index 000000000..b65dc2521
--- /dev/null
+++ b/test/integration/scenarios/faultTests/magefile.go
@@ -0,0 +1,70 @@
+//go:build mage
+
+/*
+ * Copyright (c) Microsoft Corporation.
+ * Licensed under the MIT license.
+ * SPDX-License-Identifier: MIT
+ */
+
+package main
+
+import (
+	"fmt"
+	"os"
+
+	"github.com/eclipse-symphony/symphony/test/integration/lib/testhelpers"
+	"github.com/princjef/mageutil/shellcmd"
+)
+
+func FaultTests() error {
+	fmt.Println("Running fault injection tests")
+
+	// Run fault injection tests
+	for _, test := range Faults {
+		err := FaultTestHelper(test)
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func FaultTestHelper(test FaultTestCase) error {
+	testName := fmt.Sprintf("%s/%s/%s", test.testCase, test.fault, test.faultType)
+	fmt.Println("Running ", testName)
+
+	// Step 2.1: setup cluster
+	defer testhelpers.Cleanup(testName)
+	err := testhelpers.SetupCluster()
+	if err != nil {
+		return err
+	}
+	// Step 2.2: enable port forward on specific pod
+	stopChan := make(chan struct{}, 1)
+	defer close(stopChan)
+	err = testhelpers.EnablePortForward(test.podLabel, LocalPortForward, stopChan)
+	if err != nil {
+		return err
+	}
+
+	InjectCommand := fmt.Sprintf("curl localhost:%s/%s -XPUT -d'%s'", LocalPortForward, test.fault, test.faultType)
+	os.Setenv("InjectCommand", InjectCommand)
+	os.Setenv("InjectPodLabel", test.podLabel)
+
+	err = Verify(test.testCase)
+	return err
+}
+
+// Run tests for scenarios/update
+func Verify(test string) error {
+	err := shellcmd.Command("go clean -testcache").Run()
+	if err != nil {
+		return err
+	}
+	err = shellcmd.Command(fmt.Sprintf("go test -v -timeout %s %s", TEST_TIMEOUT, test)).Run()
+	if err != nil {
+		return err
+	}
+
+	return nil
+}
diff --git a/test/integration/scenarios/faultTests/solution/update/manifestTemplates/instance.yaml b/test/integration/scenarios/faultTests/solution/update/manifestTemplates/instance.yaml
new file mode 100755
index 000000000..0460afcae
--- /dev/null
+++ b/test/integration/scenarios/faultTests/solution/update/manifestTemplates/instance.yaml
@@ -0,0 +1,16 @@
+##
+## Copyright (c) Microsoft Corporation.
+## Licensed under the MIT license.
+## SPDX-License-Identifier: MIT
+##
+apiVersion: solution.symphony/v1
+kind: Instance
+metadata:
+  annotations: {}
+  name: faultupdateinstance
+spec:
+  displayName: faultupdateinstance
+  scope: test-scope
+  solution: mysol:v1
+  target:
+    name: faultupdatetarget
diff --git a/test/integration/scenarios/faultTests/solution/update/manifestTemplates/solution-container.yaml b/test/integration/scenarios/faultTests/solution/update/manifestTemplates/solution-container.yaml
new file mode 100644
index 000000000..fb8b517dd
--- /dev/null
+++ b/test/integration/scenarios/faultTests/solution/update/manifestTemplates/solution-container.yaml
@@ -0,0 +1,5 @@
+apiVersion: solution.symphony/v1
+kind: SolutionContainer
+metadata:
+  name: mysol
+spec:  
diff --git a/test/integration/scenarios/faultTests/solution/update/manifestTemplates/solution.yaml b/test/integration/scenarios/faultTests/solution/update/manifestTemplates/solution.yaml
new file mode 100755
index 000000000..cf3718b2a
--- /dev/null
+++ b/test/integration/scenarios/faultTests/solution/update/manifestTemplates/solution.yaml
@@ -0,0 +1,13 @@
+##
+## Copyright (c) Microsoft Corporation.
+## Licensed under the MIT license.
+## SPDX-License-Identifier: MIT
+##
+apiVersion: solution.symphony/v1
+kind: Solution
+metadata:
+  annotations: {}
+  name: mysol-v-v1
+spec:
+  rootResource: mysol
+  displayName: My solution
diff --git a/test/integration/scenarios/faultTests/solution/update/manifestTemplates/target.yaml b/test/integration/scenarios/faultTests/solution/update/manifestTemplates/target.yaml
new file mode 100755
index 000000000..ac12e52db
--- /dev/null
+++ b/test/integration/scenarios/faultTests/solution/update/manifestTemplates/target.yaml
@@ -0,0 +1,29 @@
+##
+## Copyright (c) Microsoft Corporation.
+## Licensed under the MIT license.
+## SPDX-License-Identifier: MIT
+##
+apiVersion: fabric.symphony/v1
+kind: Target
+metadata:
+  name: faultupdatetarget
+  annotations: {}
+spec:
+  displayName: faultupdatetarget
+  scope: test-scope
+  topologies:
+  - bindings:
+    - config:
+        inCluster: "true"
+        noWait: "false"
+        timeout: "5m"
+      provider: providers.target.k8s
+      role: instance
+    - config:
+        inCluster: "true"
+      provider: providers.target.helm
+      role: helm.v3
+    - config:
+        inCluster: "true"
+      provider: providers.target.kubectl
+      role: yaml.k8s
diff --git a/test/integration/scenarios/faultTests/solution/update/verify/manifest_test.go b/test/integration/scenarios/faultTests/solution/update/verify/manifest_test.go
new file mode 100644
index 000000000..84fccc650
--- /dev/null
+++ b/test/integration/scenarios/faultTests/solution/update/verify/manifest_test.go
@@ -0,0 +1,353 @@
+/*
+ * Copyright (c) Microsoft Corporation.
+ * Licensed under the MIT license.
+ * SPDX-License-Identifier: MIT
+ */
+
+package verify
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/eclipse-symphony/symphony/test/integration/lib/testhelpers"
+	"github.com/princjef/mageutil/shellcmd"
+	"github.com/stretchr/testify/require"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+	"k8s.io/apimachinery/pkg/runtime/schema"
+	"k8s.io/client-go/dynamic"
+)
+
+const ()
+
+type (
+	TestCase struct {
+		// Name gives the brief introduction of each test case
+		Name string
+
+		// Target is Symphony manifest to test, e.g. solution/target
+		Target string
+
+		// ComponentsToAdd specifies the components to be added to the symphony manifest
+		ComponentsToAdd []string
+
+		// PodsToVerify specifies the pods need to be running
+		PodsToVerify []string
+
+		// DeletedPodsToVerify specifies the pods need to be deleted
+		DeletedPodsToVerify []string
+	}
+)
+
+var (
+	// manifestTemplateFolder includes manifest templates with empty components to deploy
+	manifestTemplateFolder = "../manifestTemplates"
+	// testManifestsFolder includes temporary manifest files for each test run. set in .gitignore
+	testManifestsFolder = "../manifestForTestingOnly"
+)
+
+var (
+	// Manifest templates
+	containerManifestTemplates = []string{
+		fmt.Sprintf("%s/solution-container.yaml", manifestTemplateFolder),
+	}
+
+	manifestTemplates = []string{
+		fmt.Sprintf("%s/target.yaml", manifestTemplateFolder),
+		fmt.Sprintf("%s/solution.yaml", manifestTemplateFolder),
+		fmt.Sprintf("%s/instance.yaml", manifestTemplateFolder),
+	}
+
+	// Manifests to deploy
+	testManifests = []string{
+		fmt.Sprintf("%s/target.yaml", testManifestsFolder),
+		fmt.Sprintf("%s/solution.yaml", testManifestsFolder),
+		fmt.Sprintf("%s/instance.yaml", testManifestsFolder),
+	}
+
+	testCases = []TestCase{
+		{
+			Name:                "Initial Symphony Target Deployment with nginx ingress",
+			Target:              "target",
+			ComponentsToAdd:     []string{"nginx-ingress"},
+			PodsToVerify:        []string{"proxy-nginx-ingress-controller"},
+			DeletedPodsToVerify: []string{},
+		},
+		{
+			Name:                "Update Symphony Target to add redis",
+			Target:              "target",
+			ComponentsToAdd:     []string{"nginx-ingress", "redis"},
+			PodsToVerify:        []string{"proxy-nginx-ingress-controller", "target-runtime-faultupdatetarget"},
+			DeletedPodsToVerify: []string{},
+		},
+		{
+			Name:                "Update Symphony Solution to add bitnami nginx",
+			Target:              "solution",
+			ComponentsToAdd:     []string{"bitnami-nginx"},
+			PodsToVerify:        []string{"proxy-nginx-ingress-controller", "target-runtime-faultupdatetarget", "nginx"},
+			DeletedPodsToVerify: []string{},
+		},
+		{
+			Name:                "Update Symphony Solution to remove bitnami nginx and add prometheus",
+			Target:              "solution",
+			ComponentsToAdd:     []string{"prometheus-server"},
+			PodsToVerify:        []string{"proxy-nginx-ingress-controller", "target-runtime-faultupdatetarget", "faultupdateinstance"},
+			DeletedPodsToVerify: []string{"nginx"},
+		},
+		{
+			Name:                "Update Symphony Target to remove nginx ingress and redis",
+			Target:              "target",
+			ComponentsToAdd:     []string{},
+			PodsToVerify:        []string{},
+			DeletedPodsToVerify: []string{"proxy-nginx-ingress-controller", "target-runtime-faultupdatetarget"},
+		},
+	}
+)
+
+func TestScenario_Update_AllNamespaces(t *testing.T) {
+	namespace := "nondefault"
+	defer shellcmd.Command(fmt.Sprintf("rm -rf %s", testManifestsFolder)).Run()
+	if namespace != "default" {
+		// Create non-default namespace if not exist
+		err := shellcmd.Command(fmt.Sprintf("kubectl get namespace %s", namespace)).Run()
+		if err != nil {
+			// Better to check err message here but command only returns "exit status 1" for non-exisiting namespace
+			err = shellcmd.Command(fmt.Sprintf("kubectl create namespace %s", namespace)).Run()
+			require.NoError(t, err)
+		}
+	}
+	Scenario_Update(t, namespace)
+}
+
+func Scenario_Update(t *testing.T, namespace string) {
+	// Deploy base manifests
+	for _, manifest := range containerManifestTemplates {
+		fullPath, err := filepath.Abs(manifest)
+		require.NoError(t, err)
+		err = shellcmd.Command(fmt.Sprintf("kubectl apply -f %s -n %s", fullPath, namespace)).Run()
+		require.NoError(t, err)
+	}
+	for _, manifest := range manifestTemplates {
+		fullPath, err := filepath.Abs(manifest)
+		require.NoError(t, err)
+		err = shellcmd.Command(fmt.Sprintf("kubectl apply -f %s -n %s", fullPath, namespace)).Run()
+		require.NoError(t, err)
+	}
+	for _, test := range testCases {
+		fmt.Printf("[Test case]: %s\n", test.Name)
+
+		err := testhelpers.InjectPodFailure()
+		require.NoError(t, err)
+
+		// Construct the manifests
+		err = testhelpers.BuildManifestFile(
+			manifestTemplateFolder, testManifestsFolder, test.Target, test.ComponentsToAdd)
+		require.NoError(t, err)
+
+		// Deploy the modified manifests
+		for _, manifest := range testManifests {
+			fullPath, err := filepath.Abs(manifest)
+			require.NoError(t, err)
+			// skip deploying unchanged manifest to test instance Watch logic
+			// i.e. target and solution changes should trigger instance reconciler
+			if _, err := os.Stat(fullPath); os.IsNotExist(err) {
+				continue
+			}
+
+			for i := 0; i < 10; i++ {
+				err = shellcmd.Command(fmt.Sprintf("kubectl apply -f %s -n %s", fullPath, namespace)).Run()
+				if err == nil {
+					break
+				}
+				time.Sleep(5 * time.Second)
+			}
+			require.NoError(t, err)
+		}
+		// Sleep for 10 second to hit the failure point
+		time.Sleep(10 * time.Second)
+
+		verifyTargetStatus(t, test, namespace)
+		verifyInstanceStatus(t, test, namespace)
+		verifyPodsExist(t, test, test.PodsToVerify)
+		verifyPodsDeleted(t, test, test.DeletedPodsToVerify)
+	}
+}
+
+// Verify target has correct status
+func verifyTargetStatus(t *testing.T, test TestCase, namespace string) {
+	// Verify targets
+	crd := &unstructured.Unstructured{}
+	crd.SetGroupVersionKind(schema.GroupVersionKind{
+		Group:   "fabric.symphony",
+		Version: "v1",
+		Kind:    "Target",
+	})
+
+	cfg, err := testhelpers.RestConfig()
+	require.NoError(t, err)
+
+	dyn, err := dynamic.NewForConfig(cfg)
+	require.NoError(t, err)
+
+	for {
+		resources, err := dyn.Resource(schema.GroupVersionResource{
+			Group:    "fabric.symphony",
+			Version:  "v1",
+			Resource: "targets",
+		}).Namespace(namespace).List(context.Background(), metav1.ListOptions{})
+		require.NoError(t, err)
+
+		require.Len(t, resources.Items, 1, "there should be only one target")
+
+		status := getStatus(resources.Items[0])
+		fmt.Printf("Current target status: %s\n", status)
+		require.NotEqual(t, "Failed", status, fmt.Sprintf("%s: Target should not be in failed state", test.Name))
+		if status == "Succeeded" {
+			break
+		}
+
+		sleepDuration, _ := time.ParseDuration("30s")
+		time.Sleep(sleepDuration)
+	}
+}
+
+// Verify instance has correct status
+func verifyInstanceStatus(t *testing.T, test TestCase, namespace string) {
+	// Verify instances
+	cfg, err := testhelpers.RestConfig()
+	require.NoError(t, err)
+
+	dyn, err := dynamic.NewForConfig(cfg)
+	require.NoError(t, err)
+
+	for {
+		resources, err := dyn.Resource(schema.GroupVersionResource{
+			Group:    "solution.symphony",
+			Version:  "v1",
+			Resource: "instances",
+		}).Namespace(namespace).List(context.Background(), metav1.ListOptions{})
+		require.NoError(t, err)
+
+		require.Len(t, resources.Items, 1, "there should be only one instance")
+
+		status := getStatus(resources.Items[0])
+		fmt.Printf("Current instance status: %s\n", status)
+		require.NotEqual(t, "Failed", status, fmt.Sprintf("%s: Instance should not be in failed state", test.Name))
+		if status == "Succeeded" {
+			break
+		}
+
+		sleepDuration, _ := time.ParseDuration("30s")
+		time.Sleep(sleepDuration)
+	}
+}
+
+// Verify that the pods we expect are running in the namespace
+// Lists pods from the cluster and then verifies that the
+// expected strings are found in the list.
+func verifyPodsExist(t *testing.T, test TestCase, toFind []string) {
+	// Get kube client
+	kubeClient, err := testhelpers.KubeClient()
+	require.NoError(t, err)
+
+	i := 0
+	for {
+		i++
+		// List all pods in the namespace
+		pods, err := kubeClient.CoreV1().Pods("test-scope").List(context.Background(), metav1.ListOptions{})
+		require.NoError(t, err)
+
+		// Verify that the pods we expect are running
+		notFound := make(map[string]bool)
+		for _, s := range toFind {
+			found := false
+			for _, pod := range pods.Items {
+				if strings.Contains(pod.Name, s) && pod.Status.Phase == "Running" {
+					found = true
+					break
+				}
+			}
+
+			if !found {
+				notFound[s] = true
+			}
+		}
+
+		if len(notFound) == 0 {
+			fmt.Println("All pods found!")
+			break
+		} else {
+			time.Sleep(time.Second * 5)
+
+			if i%12 == 0 {
+				fmt.Printf("Waiting for pods: %v\n", notFound)
+			}
+		}
+	}
+}
+
+// Verify that the pods we expect are deleted in the namespace
+// Lists pods from the cluster and then verifies that the
+// expected strings are no longer found in the list.
+func verifyPodsDeleted(t *testing.T, test TestCase, toFind []string) {
+	// Get kube client
+	kubeClient, err := testhelpers.KubeClient()
+	require.NoError(t, err)
+
+	i := 0
+	for {
+		i++
+		// List all pods in the namespace
+		pods, err := kubeClient.CoreV1().Pods("test-scope").List(context.Background(), metav1.ListOptions{})
+		require.NoError(t, err)
+
+		// Verify that the pods we expect are deleted
+		waitingForDeletion := make(map[string]bool)
+		for _, s := range toFind {
+			found := false
+			for _, pod := range pods.Items {
+				if strings.HasPrefix(pod.Name, s) {
+					found = true
+					break
+				}
+			}
+
+			if found {
+				waitingForDeletion[s] = true
+			}
+		}
+
+		if len(waitingForDeletion) == 0 {
+			fmt.Println("All pods deleted!")
+			break
+		} else {
+			time.Sleep(time.Second * 5)
+
+			if i%12 == 0 {
+				fmt.Printf("Waiting for pods to be deleted: %v\n", waitingForDeletion)
+			}
+		}
+	}
+}
+
+// Helper for finding the status
+func getStatus(resource unstructured.Unstructured) string {
+	status, ok := resource.Object["status"].(map[string]interface{})
+	if ok {
+		props, ok := status["provisioningStatus"].(map[string]interface{})
+		if ok {
+			statusString, ok := props["status"].(string)
+			if ok {
+				return statusString
+			}
+		}
+	}
+
+	return ""
+}
diff --git a/test/integration/scenarios/faultTests/workflow/materialize/manifest/activation.yaml b/test/integration/scenarios/faultTests/workflow/materialize/manifest/activation.yaml
new file mode 100644
index 000000000..35e642929
--- /dev/null
+++ b/test/integration/scenarios/faultTests/workflow/materialize/manifest/activation.yaml
@@ -0,0 +1,8 @@
+apiVersion: workflow.symphony/v1
+kind: Activation
+metadata:
+  name: 04workflow
+spec:
+  campaign: "04campaign:v1"
+  inputs:
+    namesOnly: true
diff --git a/test/integration/scenarios/faultTests/workflow/materialize/manifest/campaign-container.yaml b/test/integration/scenarios/faultTests/workflow/materialize/manifest/campaign-container.yaml
new file mode 100644
index 000000000..db6640d0f
--- /dev/null
+++ b/test/integration/scenarios/faultTests/workflow/materialize/manifest/campaign-container.yaml
@@ -0,0 +1,5 @@
+apiVersion: workflow.symphony/v1
+kind: CampaignContainer
+metadata:
+  name: 04campaign
+spec:  
diff --git a/test/integration/scenarios/faultTests/workflow/materialize/manifest/campaign.yaml b/test/integration/scenarios/faultTests/workflow/materialize/manifest/campaign.yaml
new file mode 100644
index 000000000..165421f1c
--- /dev/null
+++ b/test/integration/scenarios/faultTests/workflow/materialize/manifest/campaign.yaml
@@ -0,0 +1,49 @@
+apiVersion: workflow.symphony/v1
+kind: Campaign
+metadata:
+  name: 04campaign-v-v1
+spec:  
+  rootResource: 04campaign
+  firstStage: wait
+  stages:
+    wait:
+      name: wait
+      provider: providers.stage.wait
+      stageSelector: list
+      config:
+        baseUrl: http://symphony-service:8080/v1alpha2/
+        user: admin
+        password: ""
+      inputs:
+        objectType: catalogs
+        names:
+        - sitecatalog:v1
+        - sitecatalog2:v1
+        - siteapp:v1
+        - sitek8starget:v1
+        - siteinstance:v1
+    list:
+      name: list
+      provider: providers.stage.list
+      stageSelector: deploy
+      config:
+        baseUrl: http://symphony-service:8080/v1alpha2/
+        user: admin
+        password: ""
+      inputs:
+        objectType: catalogs
+        namesOnly: "${{$trigger(namesOnly,false)}}"
+    deploy:
+      name: deploy
+      provider: providers.stage.materialize
+      stageSelector: ""
+      schedule: "2020-10-31T12:00:00-07:00"
+      config:
+        baseUrl: http://symphony-service:8080/v1alpha2/
+        user: admin
+        password: ""
+        waitForDeployment: true
+        WaitTimeout: 10m
+      inputs:
+        names: "${{$output(list,items)}}"
+  selfDriving: true
\ No newline at end of file
diff --git a/test/integration/scenarios/faultTests/workflow/materialize/manifest/catalog-catalog-2.yaml b/test/integration/scenarios/faultTests/workflow/materialize/manifest/catalog-catalog-2.yaml
new file mode 100644
index 000000000..4982741f9
--- /dev/null
+++ b/test/integration/scenarios/faultTests/workflow/materialize/manifest/catalog-catalog-2.yaml
@@ -0,0 +1,23 @@
+apiVersion: federation.symphony/v1
+kind: Catalog
+metadata:
+  name: sitecatalog2-v-v1
+spec:
+  rootResource: sitecatalog2
+  catalogType: catalog
+  properties:
+    metadata: 
+      name: webappconfig2:v1
+    spec: 
+      type: config
+      properties:
+        test.dot.serviceType: "NodePort"
+        testA:
+          testB:
+            images:
+              - image: "ghcr.io/eclipse-symphony/sample-flask-app:latest"
+                ver: 1
+              - image: "ghcr.io/eclipse-symphony/sample-flask-app:latest"
+                ver: 2
+              - image: "ghcr.io/eclipse-symphony/sample-flask-app:latest"
+                ver: 3
diff --git a/test/integration/scenarios/faultTests/workflow/materialize/manifest/catalog-catalog-container-2.yaml b/test/integration/scenarios/faultTests/workflow/materialize/manifest/catalog-catalog-container-2.yaml
new file mode 100644
index 000000000..af9fdec9d
--- /dev/null
+++ b/test/integration/scenarios/faultTests/workflow/materialize/manifest/catalog-catalog-container-2.yaml
@@ -0,0 +1,5 @@
+apiVersion: federation.symphony/v1
+kind: CatalogContainer
+metadata:
+  name: sitecatalog2
+spec:  
diff --git a/test/integration/scenarios/faultTests/workflow/materialize/manifest/catalog-catalog-container.yaml b/test/integration/scenarios/faultTests/workflow/materialize/manifest/catalog-catalog-container.yaml
new file mode 100644
index 000000000..5ffa8ecb0
--- /dev/null
+++ b/test/integration/scenarios/faultTests/workflow/materialize/manifest/catalog-catalog-container.yaml
@@ -0,0 +1,5 @@
+apiVersion: federation.symphony/v1
+kind: CatalogContainer
+metadata:
+  name: sitecatalog
+spec:  
diff --git a/test/integration/scenarios/faultTests/workflow/materialize/manifest/catalog-catalog.yaml b/test/integration/scenarios/faultTests/workflow/materialize/manifest/catalog-catalog.yaml
new file mode 100644
index 000000000..dfc2f2049
--- /dev/null
+++ b/test/integration/scenarios/faultTests/workflow/materialize/manifest/catalog-catalog.yaml
@@ -0,0 +1,18 @@
+apiVersion: federation.symphony/v1
+kind: Catalog
+metadata:
+  name: sitecatalog-v-v1
+spec:
+  rootResource: sitecatalog
+  catalogType: catalog
+  properties:
+    metadata: 
+      name: webappconfig:v1
+    spec: 
+      type: config
+      properties:
+        testA:
+          testB:
+            testC:
+              serviceType: "${{$config('webappconfig2:v1','test.dot.serviceType')}}"
+              image: "${{$config('webappconfig2:v1','`.testA.testB.images[] | select(.ver > 2) | .image`')}}"
diff --git a/test/integration/scenarios/faultTests/workflow/materialize/manifest/instance-catalog-container.yaml b/test/integration/scenarios/faultTests/workflow/materialize/manifest/instance-catalog-container.yaml
new file mode 100644
index 000000000..c20f7fdab
--- /dev/null
+++ b/test/integration/scenarios/faultTests/workflow/materialize/manifest/instance-catalog-container.yaml
@@ -0,0 +1,5 @@
+apiVersion: federation.symphony/v1
+kind: CatalogContainer
+metadata:
+  name: siteinstance
+spec:  
diff --git a/test/integration/scenarios/faultTests/workflow/materialize/manifest/instance-catalog.yaml b/test/integration/scenarios/faultTests/workflow/materialize/manifest/instance-catalog.yaml
new file mode 100644
index 000000000..04ed8f948
--- /dev/null
+++ b/test/integration/scenarios/faultTests/workflow/materialize/manifest/instance-catalog.yaml
@@ -0,0 +1,16 @@
+apiVersion: federation.symphony/v1
+kind: Catalog
+metadata:
+  name: siteinstance-v-v1
+spec:
+  rootResource: siteinstance
+  catalogType: instance
+  properties:
+    metadata:
+      name: siteinstance
+    spec: 
+      solution: siteapp:v1
+      scope: nondefault
+      target:
+        selector:
+          group: site
\ No newline at end of file
diff --git a/test/integration/scenarios/faultTests/workflow/materialize/manifest/solution-catalog-container.yaml b/test/integration/scenarios/faultTests/workflow/materialize/manifest/solution-catalog-container.yaml
new file mode 100644
index 000000000..97e180af3
--- /dev/null
+++ b/test/integration/scenarios/faultTests/workflow/materialize/manifest/solution-catalog-container.yaml
@@ -0,0 +1,5 @@
+apiVersion: federation.symphony/v1
+kind: CatalogContainer
+metadata:
+  name: siteapp
+spec:  
diff --git a/test/integration/scenarios/faultTests/workflow/materialize/manifest/solution-catalog.yaml b/test/integration/scenarios/faultTests/workflow/materialize/manifest/solution-catalog.yaml
new file mode 100644
index 000000000..ff8c38478
--- /dev/null
+++ b/test/integration/scenarios/faultTests/workflow/materialize/manifest/solution-catalog.yaml
@@ -0,0 +1,21 @@
+apiVersion: federation.symphony/v1
+kind: Catalog
+metadata:
+  name: siteapp-v-v1
+spec:
+  rootResource: siteapp
+  catalogType: solution
+  properties:
+    metadata:
+      name: siteapp:v1
+    spec:   
+      components:
+      - name: web-app
+        type: container
+        metadata:
+          service.ports: "[{\"name\":\"port3011\",\"port\": 3011,\"targetPort\":5000}]"
+          service.type: "${{$config('webappconfig:v1','`.testA.testB.testC.serviceType`')}}"
+        properties:
+          deployment.replicas: "#1"
+          container.ports: "[{\"containerPort\":5000,\"protocol\":\"TCP\"}]"
+          container.image: "${{$config('webappconfig:v1','`.testA.testB.testC.image`')}}"
diff --git a/test/integration/scenarios/faultTests/workflow/materialize/manifest/target-catalog-container.yaml b/test/integration/scenarios/faultTests/workflow/materialize/manifest/target-catalog-container.yaml
new file mode 100644
index 000000000..63a1851f3
--- /dev/null
+++ b/test/integration/scenarios/faultTests/workflow/materialize/manifest/target-catalog-container.yaml
@@ -0,0 +1,5 @@
+apiVersion: federation.symphony/v1
+kind: CatalogContainer
+metadata:
+  name: sitek8starget
+spec:  
diff --git a/test/integration/scenarios/faultTests/workflow/materialize/manifest/target-catalog.yaml b/test/integration/scenarios/faultTests/workflow/materialize/manifest/target-catalog.yaml
new file mode 100644
index 000000000..44f96306d
--- /dev/null
+++ b/test/integration/scenarios/faultTests/workflow/materialize/manifest/target-catalog.yaml
@@ -0,0 +1,24 @@
+apiVersion: federation.symphony/v1
+kind: Catalog
+metadata:
+  name: sitek8starget-v-v1
+spec:
+  rootResource: sitek8starget
+  catalogType: target
+  properties:
+    metadata:
+      name: sitek8starget
+    spec:
+      properties:
+        group: site
+      topologies:
+      - bindings:        
+        - role: yaml.k8s
+          provider: providers.target.kubectl
+          config:
+            inCluster: "true"
+        - role: instance
+          provider: providers.target.k8s
+          config:
+            inCluster: "true"   
+            deploymentStrategy: "services"
\ No newline at end of file
diff --git a/test/integration/scenarios/faultTests/workflow/materialize/verify/manifest_test.go b/test/integration/scenarios/faultTests/workflow/materialize/verify/manifest_test.go
new file mode 100644
index 000000000..c6a5c0e8e
--- /dev/null
+++ b/test/integration/scenarios/faultTests/workflow/materialize/verify/manifest_test.go
@@ -0,0 +1,389 @@
+/*
+ * Copyright (c) Microsoft Corporation.
+ * Licensed under the MIT license.
+ * SPDX-License-Identifier: MIT
+ */
+
+package verify
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"path/filepath"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/eclipse-symphony/symphony/api/pkg/apis/v1alpha1/model"
+	"github.com/eclipse-symphony/symphony/coa/pkg/apis/v1alpha2"
+	"github.com/eclipse-symphony/symphony/test/integration/lib/testhelpers"
+	"github.com/princjef/mageutil/shellcmd"
+	"github.com/stretchr/testify/require"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+	"k8s.io/apimachinery/pkg/runtime/schema"
+	"k8s.io/client-go/dynamic"
+)
+
+var (
+	testCatalogs = []string{
+		"./manifest/catalog-catalog-container.yaml",
+		"./manifest/catalog-catalog-container-2.yaml",
+		"./manifest/instance-catalog-container.yaml",
+		"./manifest/solution-catalog-container.yaml",
+		"./manifest/target-catalog-container.yaml",
+
+		"./manifest/catalog-catalog.yaml",
+		"./manifest/catalog-catalog-2.yaml",
+		"./manifest/instance-catalog.yaml",
+		"./manifest/solution-catalog.yaml",
+		"./manifest/target-catalog.yaml",
+	}
+
+	testCampaign = []string{
+		"./manifest/campaign-container.yaml",
+		"./manifest/campaign.yaml",
+	}
+
+	testActivations = []string{
+		"./manifest/activation.yaml",
+	}
+)
+
+func TestMaterializeWorkflow(t *testing.T) {
+	namespace := "nondefault"
+	err := testhelpers.InjectPodFailure()
+	require.NoError(t, err)
+	DeployManifests(namespace)
+	CheckCatalogs(t, namespace)
+	CheckCampaign(t, namespace)
+	CheckActivationStatus(t, namespace)
+	CheckTargetStatus(t, namespace)
+	CheckInstanceStatus(t, namespace)
+	VerifyPodsExist(t, namespace)
+}
+
+func DeployManifests(namespace string) error {
+	repoPath := "../"
+	if namespace != "default" {
+		// Create non-default namespace if not exist
+		err := shellcmd.Command(fmt.Sprintf("kubectl get namespace %s", namespace)).Run()
+		if err != nil {
+			// Better to check err message here but command only returns "exit status 1" for non-exisiting namespace
+			err = shellcmd.Command(fmt.Sprintf("kubectl create namespace %s", namespace)).Run()
+			if err != nil {
+				return err
+			}
+		}
+	}
+	// Deploy the catalogs
+	for _, catalog := range testCatalogs {
+		absCatalog := filepath.Join(repoPath, catalog)
+		err := shellcmd.Command(fmt.Sprintf("kubectl apply -f %s -n %s", absCatalog, namespace)).Run()
+		if err != nil {
+			return err
+		}
+	}
+
+	for _, campaign := range testCampaign {
+		absCampaign := filepath.Join(repoPath, campaign)
+		err := shellcmd.Command(fmt.Sprintf("kubectl apply -f %s -n %s", absCampaign, namespace)).Run()
+		if err != nil {
+			return err
+		}
+	}
+
+	// wait for 5 seconds to make sure campaign is created
+	time.Sleep(time.Second * 5)
+	for _, activation := range testActivations {
+		absActivation := filepath.Join(repoPath, activation)
+		err := shellcmd.Command(fmt.Sprintf("kubectl apply -f %s -n %s", absActivation, namespace)).Run()
+		if err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+// Verify catalog created
+func CheckCatalogs(t *testing.T, namespace string) {
+	fmt.Printf("Checking Catalogs\n")
+	if namespace == "" {
+		namespace = "default"
+	}
+	crd := &unstructured.Unstructured{}
+	crd.SetGroupVersionKind(schema.GroupVersionKind{
+		Group:   "federation.symphony",
+		Version: "v1",
+		Kind:    "Catalog",
+	})
+
+	cfg, err := testhelpers.RestConfig()
+	require.NoError(t, err)
+
+	dyn, err := dynamic.NewForConfig(cfg)
+	require.NoError(t, err)
+
+	for {
+		resources, err := dyn.Resource(schema.GroupVersionResource{
+			Group:    "federation.symphony",
+			Version:  "v1",
+			Resource: "catalogs",
+		}).Namespace(namespace).List(context.Background(), metav1.ListOptions{})
+		require.NoError(t, err)
+
+		catalogs := []string{}
+		for _, item := range resources.Items {
+			catalogs = append(catalogs, item.GetName())
+		}
+		fmt.Printf("Catalogs: %v\n", catalogs)
+		if len(resources.Items) == 7 {
+			break
+		}
+
+		sleepDuration, _ := time.ParseDuration("30s")
+		time.Sleep(sleepDuration)
+	}
+}
+
+// Verify catalog created
+func CheckCampaign(t *testing.T, namespace string) {
+	fmt.Printf("Checking Campaign\n")
+	if namespace == "" {
+		namespace = "default"
+	}
+	crd := &unstructured.Unstructured{}
+	crd.SetGroupVersionKind(schema.GroupVersionKind{
+		Group:   "workflow.symphony",
+		Version: "v1",
+		Kind:    "Campaign",
+	})
+
+	cfg, err := testhelpers.RestConfig()
+	require.NoError(t, err)
+
+	dyn, err := dynamic.NewForConfig(cfg)
+	require.NoError(t, err)
+
+	for {
+		resources, err := dyn.Resource(schema.GroupVersionResource{
+			Group:    "workflow.symphony",
+			Version:  "v1",
+			Resource: "campaigns",
+		}).Namespace(namespace).List(context.Background(), metav1.ListOptions{})
+		require.NoError(t, err)
+
+		if len(resources.Items) == 1 {
+			break
+		}
+
+		sleepDuration, _ := time.ParseDuration("30s")
+		time.Sleep(sleepDuration)
+	}
+}
+
+func CheckActivationStatus(t *testing.T, namespace string) {
+	fmt.Printf("Checking Activation\n")
+	if namespace == "" {
+		namespace = "default"
+	}
+	crd := &unstructured.Unstructured{}
+	crd.SetGroupVersionKind(schema.GroupVersionKind{
+		Group:   "workflow.symphony",
+		Version: "v1",
+		Kind:    "Activation",
+	})
+
+	cfg, err := testhelpers.RestConfig()
+	require.NoError(t, err)
+
+	dyn, err := dynamic.NewForConfig(cfg)
+	require.NoError(t, err)
+
+	for {
+		resources, err := dyn.Resource(schema.GroupVersionResource{
+			Group:    "workflow.symphony",
+			Version:  "v1",
+			Resource: "activations",
+		}).Namespace(namespace).List(context.Background(), metav1.ListOptions{})
+		require.NoError(t, err)
+
+		require.Len(t, resources.Items, 1, "there should be only one activation")
+
+		bytes, _ := json.Marshal(resources.Items[0].Object)
+		var state model.ActivationState
+		err = json.Unmarshal(bytes, &state)
+		require.NoError(t, err)
+		status := state.Status.Status
+		fmt.Printf("Current activation status: %s\n", status)
+		if status == v1alpha2.Done {
+			require.Equal(t, 3, len(state.Status.StageHistory))
+			require.Equal(t, "wait", state.Status.StageHistory[0].Stage)
+			require.Equal(t, "list", state.Status.StageHistory[0].NextStage)
+			require.Equal(t, v1alpha2.Done, state.Status.StageHistory[0].Status)
+			require.Equal(t, v1alpha2.Done.String(), state.Status.StageHistory[0].StatusMessage)
+			require.Equal(t, "catalogs", state.Status.StageHistory[0].Inputs["objectType"])
+			require.Equal(t, []interface{}{"sitecatalog:v1", "sitecatalog2:v1", "siteapp:v1", "sitek8starget:v1", "siteinstance:v1"}, state.Status.StageHistory[0].Inputs["names"].([]interface{}))
+			require.Equal(t, "catalogs", state.Status.StageHistory[0].Outputs["objectType"])
+			require.Equal(t, "list", state.Status.StageHistory[1].Stage)
+			require.Equal(t, "deploy", state.Status.StageHistory[1].NextStage)
+			require.Equal(t, v1alpha2.Done, state.Status.StageHistory[1].Status)
+			require.Equal(t, v1alpha2.Done.String(), state.Status.StageHistory[1].StatusMessage)
+			require.Equal(t, "catalogs", state.Status.StageHistory[1].Inputs["objectType"])
+			require.Equal(t, true, state.Status.StageHistory[1].Inputs["namesOnly"])
+			require.Equal(t, []interface{}{"siteapp-v-v1", "sitecatalog-v-v1", "sitecatalog2-v-v1", "siteinstance-v-v1", "sitek8starget-v-v1"}, state.Status.StageHistory[1].Outputs["items"].([]interface{}))
+			require.Equal(t, "catalogs", state.Status.StageHistory[1].Outputs["objectType"])
+			require.Equal(t, "deploy", state.Status.StageHistory[2].Stage)
+			require.Equal(t, "", state.Status.StageHistory[2].NextStage)
+			require.Equal(t, v1alpha2.Done, state.Status.StageHistory[2].Status)
+			require.Equal(t, v1alpha2.Done.String(), state.Status.StageHistory[2].StatusMessage)
+			require.Equal(t, []interface{}{"siteapp-v-v1", "sitecatalog-v-v1", "sitecatalog2-v-v1", "siteinstance-v-v1", "sitek8starget-v-v1"}, state.Status.StageHistory[2].Inputs["names"].([]interface{}))
+			break
+		}
+
+		sleepDuration, _ := time.ParseDuration("30s")
+		time.Sleep(sleepDuration)
+	}
+}
+
+// Verify target has correct status
+func CheckTargetStatus(t *testing.T, namespace string) {
+	fmt.Printf("Checking Target\n")
+	if namespace == "" {
+		namespace = "default"
+	}
+	crd := &unstructured.Unstructured{}
+	crd.SetGroupVersionKind(schema.GroupVersionKind{
+		Group:   "fabric.symphony",
+		Version: "v1",
+		Kind:    "Target",
+	})
+
+	cfg, err := testhelpers.RestConfig()
+	require.NoError(t, err)
+
+	dyn, err := dynamic.NewForConfig(cfg)
+	require.NoError(t, err)
+
+	for {
+		resources, err := dyn.Resource(schema.GroupVersionResource{
+			Group:    "fabric.symphony",
+			Version:  "v1",
+			Resource: "targets",
+		}).Namespace(namespace).List(context.Background(), metav1.ListOptions{})
+		require.NoError(t, err)
+
+		require.Len(t, resources.Items, 1, "there should be only one target")
+
+		status := getStatus(resources.Items[0])
+		fmt.Printf("Current target status: %s\n", status)
+		require.NotEqual(t, "Failed", status, "target should not be in failed state")
+		if status == "Succeeded" {
+			break
+		}
+
+		sleepDuration, _ := time.ParseDuration("30s")
+		time.Sleep(sleepDuration)
+	}
+}
+
+// Verify instance has correct status
+func CheckInstanceStatus(t *testing.T, namespace string) {
+	fmt.Printf("Checking Instances\n")
+	if namespace == "" {
+		namespace = "default"
+	}
+	cfg, err := testhelpers.RestConfig()
+	require.NoError(t, err)
+
+	dyn, err := dynamic.NewForConfig(cfg)
+	require.NoError(t, err)
+
+	for {
+		resources, err := dyn.Resource(schema.GroupVersionResource{
+			Group:    "solution.symphony",
+			Version:  "v1",
+			Resource: "instances",
+		}).Namespace(namespace).List(context.Background(), metav1.ListOptions{})
+		require.NoError(t, err)
+
+		require.Len(t, resources.Items, 1, "there should be only one instance")
+
+		status := getStatus(resources.Items[0])
+		fmt.Printf("Current instance status: %s\n", status)
+		require.NotEqual(t, "Failed", status, "instance should not be in failed state")
+		if status == "Succeeded" {
+			break
+		}
+
+		sleepDuration, _ := time.ParseDuration("30s")
+		time.Sleep(sleepDuration)
+	}
+}
+
+// Verify that the pods we expect are running in the namespace
+// Lists pods from the cluster and then verifies that the
+// expected strings are found in the list.
+func VerifyPodsExist(t *testing.T, namespace string) {
+	fmt.Printf("Checking Pod Status\n")
+	kubeClient, err := testhelpers.KubeClient()
+	require.NoError(t, err)
+
+	if namespace == "" {
+		namespace = "default"
+	}
+	i := 0
+	for {
+		i++
+		// List all pods in the namespace
+		pods, err := kubeClient.CoreV1().Pods(namespace).List(context.Background(), metav1.ListOptions{})
+		require.NoError(t, err)
+
+		// Verify that the pods we expect are running
+		toFind := []string{"web-app"}
+
+		notFound := make(map[string]bool)
+		for _, s := range toFind {
+			found := false
+			for _, pod := range pods.Items {
+				if strings.Contains(pod.Name, s) && pod.Status.Phase == "Running" {
+					found = true
+					break
+				}
+			}
+
+			if !found {
+				notFound[s] = true
+			}
+		}
+
+		if len(notFound) == 0 {
+			fmt.Println("All pods found!")
+			break
+		} else {
+			time.Sleep(time.Second * 5)
+
+			if i%12 == 0 {
+				fmt.Printf("Waiting for pods: %v\n", notFound)
+			}
+		}
+	}
+}
+
+// Helper for finding the status
+func getStatus(resource unstructured.Unstructured) string {
+	status, ok := resource.Object["status"].(map[string]interface{})
+	if ok {
+		props, ok := status["provisioningStatus"].(map[string]interface{})
+		if ok {
+			statusString, ok := props["status"].(string)
+			if ok {
+				return statusString
+			}
+		}
+	}
+
+	return ""
+}
diff --git a/test/localenv/magefile.go b/test/localenv/magefile.go
index 77ffe0beb..13d5fbd44 100644
--- a/test/localenv/magefile.go
+++ b/test/localenv/magefile.go
@@ -465,6 +465,15 @@ func buildAPI() error {
 	return shellcmd.Command(fmt.Sprintf("docker buildx build --platform %s -f ../../api/Dockerfile -t %s \"../..\" --load", platform, imageName)).Run() //oss
 }
 
+func (Build) ApiFault() error {
+	return buildAPIFault()
+}
+
+func buildAPIFault() error {
+	imageName := "ghcr.io/eclipse-symphony/symphony-api"
+	return shellcmd.Command(fmt.Sprintf("docker buildx build --platform %s -f ../../api/Dockerfile -t %s --build-arg FAULT_INJECTION_ENABLED=true \"../..\" --load", platform, imageName)).Run() //oss
+}
+
 func buildAgent() error {
 	pollAgentImageName := "ghcr.io/eclipse-symphony/symphony-poll-agent"
 	targetAgentImageName := "ghcr.io/eclipse-symphony/symphony-target-agent"
@@ -483,6 +492,15 @@ func buildK8s() error {
 	return shellcmd.Command(fmt.Sprintf("docker buildx build --platform %s -f ../../k8s/Dockerfile -t %s \"../..\" --load", platform, imageName)).Run() //oss
 }
 
+func (Build) K8sFault() error {
+	return buildK8sFault()
+}
+func buildK8sFault() error {
+	// Pass fault arguments if required
+	imageName := "ghcr.io/eclipse-symphony/symphony-k8s"
+	return shellcmd.Command(fmt.Sprintf("docker buildx build --platform %s -f ../../k8s/Dockerfile -t %s  --build-arg FAULT_INJECTION_ENABLED=true \"../..\" --load", platform, imageName)).Run() //oss
+}
+
 /******************** Minikube ********************/
 
 // Installs the Minikube binary on your machine.

From 4303becb584e6e82ddf5e8b72fb1958b81f51234 Mon Sep 17 00:00:00 2001
From: Xingdong <xdlisjtu@gmail.com>
Date: Mon, 4 Nov 2024 11:07:18 +0800
Subject: [PATCH 2/9] README

---
 .../scenarios/faultTests/README.md            | 60 +++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/test/integration/scenarios/faultTests/README.md b/test/integration/scenarios/faultTests/README.md
index e69de29bb..15eca9f2d 100644
--- a/test/integration/scenarios/faultTests/README.md
+++ b/test/integration/scenarios/faultTests/README.md
@@ -0,0 +1,60 @@
+<!--
+Copyright (c) Microsoft Corporation.
+Licensed under the MIT license.
+SPDX-License-Identifier: MIT
+-->
+# Overview
+
+This is a framework for symphony fault tests. 
+
+# How to add a new fault test
+1. Add failpoints in codes
+
+Add a comment line like example blow. Gofail package will translate it to the failpoint code when compiled with failpoint enabled.
+```
+// gofail: var beforeProviders string
+```
+
+2. Add a new fault test case
+
+There are already two fault tests - solution upgrade and workflow materialize in the faultTests folder. You can add a new case in the [constants.go](./constants.go) you want to use existing fault tests with new failpoint.
+
+For example, you can specify the test, the pod to inject failure, the failpoint name and fault types in the below structure. The most common fault type is `100.0%panic`. And you can also use other faults like sleep, error following [Gofail term](https://github.com/etcd-io/gofail/blob/master/doc/design.md#gofail-term)
+```json
+{
+    testCase:  TestCases["solutionUpdate"],
+    podLabel:  PodLabels["api"],
+    fault:     "onQueueError",
+    faultType: DefaultFaultType,
+}
+```
+
+You can also add new fault tests under faultTests if the existing ones don't hit the new failpoints.
+
+# Run tests
+
+## Local
+First build fault images and setup cluster 
+```
+cd test/localenv
+mage build:apifault
+mage build:k8sfault
+mage cluster:up
+```
+Trigger fault test.
+```
+cd test/integration/scenarios/faultTests
+mage faulttests
+```
+
+## Github Action
+
+Here is a fault [github action](../../../../.github/workflows/fault.yml) and you can trigger the workflow in the Action page.
+
+# Diagnostic
+
+## Local
+All the test logs are collected under `/tmp/symphony-integration-test-logs/`
+
+## Github Action
+All the test logs are collected in the artifacts
\ No newline at end of file

From 033bcc916c68097aa04370bfae2cfeb3cbc2715d Mon Sep 17 00:00:00 2001
From: Xingdong <xdlisjtu@gmail.com>
Date: Fri, 8 Nov 2024 14:35:19 +0800
Subject: [PATCH 3/9] refactor and fix comments

---
 .github/workflows/fault.yml                   |   4 +-
 test/integration/lib/testhelpers/helpers.go   |  18 ---
 test/integration/lib/testhelpers/kubeutil.go  |  35 -----
 .../scenarios/faultTests/constants.go         | 124 ------------------
 .../scenarios/faultTests/magefile.go          |  19 +--
 .../solution/update/verify/manifest_test.go   |  18 +--
 .../materialize/verify/manifest_test.go       |   3 +-
 7 files changed, 23 insertions(+), 198 deletions(-)
 delete mode 100644 test/integration/scenarios/faultTests/constants.go

diff --git a/.github/workflows/fault.yml b/.github/workflows/fault.yml
index 4f74cdabf..011975332 100644
--- a/.github/workflows/fault.yml
+++ b/.github/workflows/fault.yml
@@ -22,10 +22,10 @@ jobs:
   build:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
 
     - name: Set up Go
-      uses: actions/setup-go@v3
+      uses: actions/setup-go@v5
       with:
         go-version: 1.22.4
         
diff --git a/test/integration/lib/testhelpers/helpers.go b/test/integration/lib/testhelpers/helpers.go
index 2744fefb3..35eba063d 100644
--- a/test/integration/lib/testhelpers/helpers.go
+++ b/test/integration/lib/testhelpers/helpers.go
@@ -9,7 +9,6 @@ import (
 	"time"
 
 	"github.com/eclipse-symphony/symphony/test/integration/lib/shell"
-	"github.com/princjef/mageutil/shellcmd"
 )
 
 func SetupClusterWithTunnel() (context.CancelFunc, int, error) {
@@ -125,20 +124,3 @@ func isProcessRunning(pid int) bool {
 	err = process.Signal(syscall.Signal(0))
 	return err == nil
 }
-
-func InjectPodFailure() error {
-	InjectCommand := os.Getenv("InjectCommand")
-	InjectPodLabel := os.Getenv("InjectPodLabel")
-	if InjectCommand == "" || InjectPodLabel == "" {
-		fmt.Println("InjectCommand is ", InjectCommand, "and InjectPodLabel is ", InjectPodLabel, ", skip error injection")
-		return nil
-	}
-
-	WaitFailpointServer(InjectPodLabel)
-	err := shellcmd.Command(InjectCommand).Run()
-	if err != nil {
-		fmt.Println("Failed to inject pod failure: " + err.Error())
-	}
-	fmt.Println("Injected fault")
-	return err
-}
diff --git a/test/integration/lib/testhelpers/kubeutil.go b/test/integration/lib/testhelpers/kubeutil.go
index 990849c09..ca827b497 100644
--- a/test/integration/lib/testhelpers/kubeutil.go
+++ b/test/integration/lib/testhelpers/kubeutil.go
@@ -184,38 +184,3 @@ func WaitPodOnline(podlabel string) error {
 	}
 	return fmt.Errorf("timeout waiting for pod to be ready")
 }
-
-func WaitFailpointServer(podlabel string) error {
-	clientset, err := KubeClient()
-	if err != nil {
-		return err
-	}
-	err = WaitPodOnline(podlabel)
-	if err != nil {
-		return err
-	}
-	pods := clientset.CoreV1().Pods("default")
-	for i := 0; i < 10; i++ {
-		podList, err := pods.List(context.Background(), metav1.ListOptions{
-			LabelSelector: podlabel,
-		})
-		if err != nil {
-			return err
-		}
-		if len(podList.Items) > 0 {
-			pod := podList.Items[0]
-			if pod.Status.Phase == corev1.PodRunning {
-				err = ShellExec(fmt.Sprintf("kubectl exec %s -- curl localhost:22381", pod.Name))
-				if err == nil {
-					return nil
-				} else {
-					fmt.Println("failed to connect to failpoint server, waiting...")
-				}
-			} else {
-				fmt.Println("pod not ready yet, waiting..." + pod.Status.Phase)
-			}
-		}
-		time.Sleep(time.Second * 10)
-	}
-	return fmt.Errorf("timeout waiting for pod to be ready")
-}
diff --git a/test/integration/scenarios/faultTests/constants.go b/test/integration/scenarios/faultTests/constants.go
deleted file mode 100644
index 74c253f27..000000000
--- a/test/integration/scenarios/faultTests/constants.go
+++ /dev/null
@@ -1,124 +0,0 @@
-//go:build mage
-
-/*
- * Copyright (c) Microsoft Corporation.
- * Licensed under the MIT license.
- * SPDX-License-Identifier: MIT
- */
-
-package main
-
-// Test config
-const (
-	TEST_TIMEOUT = "30m"
-)
-
-type FaultTestCase struct {
-	testCase  string
-	podLabel  string
-	fault     string
-	faultType string
-}
-
-var (
-	TestCases = map[string]string{
-		"solutionUpdate":      "./solution/update/verify/...",
-		"workflowMaterialize": "./workflow/materialize/verify/...",
-	}
-
-	PodLabels = map[string]string{
-		"api": "app=symphony-api",
-		"k8s": "control-plane=symphony-controller-manager",
-	}
-	Faults = []FaultTestCase{
-		{
-			testCase:  TestCases["solutionUpdate"],
-			podLabel:  PodLabels["api"],
-			fault:     "onQueueError",
-			faultType: DefaultFaultType,
-		},
-		{
-			testCase:  TestCases["solutionUpdate"],
-			podLabel:  PodLabels["api"],
-			fault:     "beforeProviders",
-			faultType: DefaultFaultType,
-		},
-		{
-			testCase:  TestCases["solutionUpdate"],
-			podLabel:  PodLabels["api"],
-			fault:     "beforeDeploymentError",
-			faultType: DefaultFaultType,
-		},
-		{
-			testCase:  TestCases["solutionUpdate"],
-			podLabel:  PodLabels["api"],
-			fault:     "afterDeploymentError",
-			faultType: DefaultFaultType,
-		},
-		{
-			testCase:  TestCases["solutionUpdate"],
-			podLabel:  PodLabels["api"],
-			fault:     "beforeConcludeSummary",
-			faultType: DefaultFaultType,
-		},
-		{
-			testCase:  TestCases["solutionUpdate"],
-			podLabel:  PodLabels["api"],
-			fault:     "beforeConcludeSummary",
-			faultType: DefaultFaultType,
-		},
-		{
-			testCase:  TestCases["solutionUpdate"],
-			podLabel:  PodLabels["k8s"],
-			fault:     "beforePollingResult",
-			faultType: DefaultFaultType,
-		},
-		{
-			testCase:  TestCases["solutionUpdate"],
-			podLabel:  PodLabels["k8s"],
-			fault:     "afterPollingResult",
-			faultType: DefaultFaultType,
-		},
-		{
-			testCase:  TestCases["solutionUpdate"],
-			podLabel:  PodLabels["k8s"],
-			fault:     "beforeQueueJob",
-			faultType: DefaultFaultType,
-		},
-		{
-			testCase:  TestCases["solutionUpdate"],
-			podLabel:  PodLabels["k8s"],
-			fault:     "afterQueueJob",
-			faultType: DefaultFaultType,
-		},
-		{
-			testCase:  TestCases["workflowMaterialize"],
-			podLabel:  PodLabels["api"],
-			fault:     "afterMaterializeOnce",
-			faultType: DefaultFaultType,
-		},
-		{
-			testCase:  TestCases["workflowMaterialize"],
-			podLabel:  PodLabels["api"],
-			fault:     "afterProvider",
-			faultType: DefaultFaultType,
-		},
-		// afterPublishTrigger fault test cannot pass now because of dedup issue in activation
-		// {
-		// 	testCase:  TestCases["workflowMaterialize"],
-		// 	podLabel:  PodLabels["api"],
-		// 	fault:     "afterPublishTrigger",
-		// 	faultType: DefaultFaultType,
-		// },
-		{
-			testCase:  TestCases["workflowMaterialize"],
-			podLabel:  PodLabels["api"],
-			fault:     "afterRunTrigger",
-			faultType: DefaultFaultType,
-		},
-	}
-
-	LocalPortForward = "22381"
-
-	DefaultFaultType = "100.0%panic"
-)
diff --git a/test/integration/scenarios/faultTests/magefile.go b/test/integration/scenarios/faultTests/magefile.go
index b65dc2521..153367f67 100644
--- a/test/integration/scenarios/faultTests/magefile.go
+++ b/test/integration/scenarios/faultTests/magefile.go
@@ -13,6 +13,7 @@ import (
 	"os"
 
 	"github.com/eclipse-symphony/symphony/test/integration/lib/testhelpers"
+	"github.com/eclipse-symphony/symphony/test/integration/scenarios/faultTests/utils"
 	"github.com/princjef/mageutil/shellcmd"
 )
 
@@ -20,7 +21,7 @@ func FaultTests() error {
 	fmt.Println("Running fault injection tests")
 
 	// Run fault injection tests
-	for _, test := range Faults {
+	for _, test := range utils.Faults {
 		err := FaultTestHelper(test)
 		if err != nil {
 			return err
@@ -29,8 +30,8 @@ func FaultTests() error {
 	return nil
 }
 
-func FaultTestHelper(test FaultTestCase) error {
-	testName := fmt.Sprintf("%s/%s/%s", test.testCase, test.fault, test.faultType)
+func FaultTestHelper(test utils.FaultTestCase) error {
+	testName := fmt.Sprintf("%s/%s/%s", test.TestCase, test.Fault, test.FaultType)
 	fmt.Println("Running ", testName)
 
 	// Step 2.1: setup cluster
@@ -42,16 +43,16 @@ func FaultTestHelper(test FaultTestCase) error {
 	// Step 2.2: enable port forward on specific pod
 	stopChan := make(chan struct{}, 1)
 	defer close(stopChan)
-	err = testhelpers.EnablePortForward(test.podLabel, LocalPortForward, stopChan)
+	err = testhelpers.EnablePortForward(test.PodLabel, utils.LocalPortForward, stopChan)
 	if err != nil {
 		return err
 	}
 
-	InjectCommand := fmt.Sprintf("curl localhost:%s/%s -XPUT -d'%s'", LocalPortForward, test.fault, test.faultType)
-	os.Setenv("InjectCommand", InjectCommand)
-	os.Setenv("InjectPodLabel", test.podLabel)
+	InjectCommand := fmt.Sprintf("curl localhost:%s/%s -XPUT -d'%s'", utils.LocalPortForward, test.Fault, test.FaultType)
+	os.Setenv(utils.InjectFaultEnvKey, InjectCommand)
+	os.Setenv(utils.PodEnvKey, test.PodLabel)
 
-	err = Verify(test.testCase)
+	err = Verify(test.TestCase)
 	return err
 }
 
@@ -61,7 +62,7 @@ func Verify(test string) error {
 	if err != nil {
 		return err
 	}
-	err = shellcmd.Command(fmt.Sprintf("go test -v -timeout %s %s", TEST_TIMEOUT, test)).Run()
+	err = shellcmd.Command(fmt.Sprintf("go test -v -timeout %s %s", utils.TEST_TIMEOUT, test)).Run()
 	if err != nil {
 		return err
 	}
diff --git a/test/integration/scenarios/faultTests/solution/update/verify/manifest_test.go b/test/integration/scenarios/faultTests/solution/update/verify/manifest_test.go
index 84fccc650..32322a6b8 100644
--- a/test/integration/scenarios/faultTests/solution/update/verify/manifest_test.go
+++ b/test/integration/scenarios/faultTests/solution/update/verify/manifest_test.go
@@ -16,6 +16,7 @@ import (
 	"time"
 
 	"github.com/eclipse-symphony/symphony/test/integration/lib/testhelpers"
+	"github.com/eclipse-symphony/symphony/test/integration/scenarios/faultTests/utils"
 	"github.com/princjef/mageutil/shellcmd"
 	"github.com/stretchr/testify/require"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -113,14 +114,13 @@ var (
 func TestScenario_Update_AllNamespaces(t *testing.T) {
 	namespace := "nondefault"
 	defer shellcmd.Command(fmt.Sprintf("rm -rf %s", testManifestsFolder)).Run()
-	if namespace != "default" {
-		// Create non-default namespace if not exist
-		err := shellcmd.Command(fmt.Sprintf("kubectl get namespace %s", namespace)).Run()
-		if err != nil {
-			// Better to check err message here but command only returns "exit status 1" for non-exisiting namespace
-			err = shellcmd.Command(fmt.Sprintf("kubectl create namespace %s", namespace)).Run()
-			require.NoError(t, err)
-		}
+
+	// Create non-default namespace if not exist
+	err := shellcmd.Command(fmt.Sprintf("kubectl get namespace %s", namespace)).Run()
+	if err != nil {
+		// Better to check err message here but command only returns "exit status 1" for non-exisiting namespace
+		err = shellcmd.Command(fmt.Sprintf("kubectl create namespace %s", namespace)).Run()
+		require.NoError(t, err)
 	}
 	Scenario_Update(t, namespace)
 }
@@ -142,7 +142,7 @@ func Scenario_Update(t *testing.T, namespace string) {
 	for _, test := range testCases {
 		fmt.Printf("[Test case]: %s\n", test.Name)
 
-		err := testhelpers.InjectPodFailure()
+		err := utils.InjectPodFailure()
 		require.NoError(t, err)
 
 		// Construct the manifests
diff --git a/test/integration/scenarios/faultTests/workflow/materialize/verify/manifest_test.go b/test/integration/scenarios/faultTests/workflow/materialize/verify/manifest_test.go
index c6a5c0e8e..427212ed1 100644
--- a/test/integration/scenarios/faultTests/workflow/materialize/verify/manifest_test.go
+++ b/test/integration/scenarios/faultTests/workflow/materialize/verify/manifest_test.go
@@ -18,6 +18,7 @@ import (
 	"github.com/eclipse-symphony/symphony/api/pkg/apis/v1alpha1/model"
 	"github.com/eclipse-symphony/symphony/coa/pkg/apis/v1alpha2"
 	"github.com/eclipse-symphony/symphony/test/integration/lib/testhelpers"
+	"github.com/eclipse-symphony/symphony/test/integration/scenarios/faultTests/utils"
 	"github.com/princjef/mageutil/shellcmd"
 	"github.com/stretchr/testify/require"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -53,7 +54,7 @@ var (
 
 func TestMaterializeWorkflow(t *testing.T) {
 	namespace := "nondefault"
-	err := testhelpers.InjectPodFailure()
+	err := utils.InjectPodFailure()
 	require.NoError(t, err)
 	DeployManifests(namespace)
 	CheckCatalogs(t, namespace)

From cbe395fdf72d6929df9376a540d35e4a00ef3712 Mon Sep 17 00:00:00 2001
From: Xingdong <xdlisjtu@gmail.com>
Date: Tue, 12 Nov 2024 19:39:13 +0800
Subject: [PATCH 4/9] fix comments

---
 api/Dockerfile                                |   2 +-
 .../v1alpha1/managers/stage/stage-manager.go  |   1 +
 .../stage/materialize/materialize.go          |   1 +
 api/pkg/apis/v1alpha1/vendors/stage-vendor.go |   1 +
 .../v1alpha2/providers/pubsub/redis/redis.go  |   1 +
 k8s/apis/fabric/v1/target_webhook.go          |   1 +
 .../fabric/target_polling_controller.go       |   1 +
 .../solution/instance_polling_controller.go   |   2 +
 k8s/reconcilers/deployment.go                 |   4 +
 .../scenarios/faultTests/README.md            |   1 +
 .../scenarios/faultTests/magefile.go          |   2 +-
 .../scenarios/faultTests/utils/constants.go   | 123 ++++++++++++++++++
 .../scenarios/faultTests/utils/helpers.go     |  89 +++++++++++++
 13 files changed, 227 insertions(+), 2 deletions(-)
 create mode 100644 test/integration/scenarios/faultTests/utils/constants.go
 create mode 100644 test/integration/scenarios/faultTests/utils/helpers.go

diff --git a/api/Dockerfile b/api/Dockerfile
index 677ca76b1..da13ac2ba 100644
--- a/api/Dockerfile
+++ b/api/Dockerfile
@@ -60,6 +60,6 @@ ADD ./api/symphony-api.json /
 EXPOSE 8080
 EXPOSE 8081
 ENV LOG_LEVEL=Debug
-ENV GOFAIL_HTTP "127.0.0.1:22381"
+ENV GOFAIL_HTTP="127.0.0.1:22381"
 # ENV CONFIG /symphony-api.json
 CMD sh -c 'if [ -f /etc/pki/ca-trust/source/anchors/proxy-cert.crt ]; then update-ca-trust; fi && exec /symphony-api -c $CONFIG -l $LOG_LEVEL'
\ No newline at end of file
diff --git a/api/pkg/apis/v1alpha1/managers/stage/stage-manager.go b/api/pkg/apis/v1alpha1/managers/stage/stage-manager.go
index 0ba93b44c..381089d74 100644
--- a/api/pkg/apis/v1alpha1/managers/stage/stage-manager.go
+++ b/api/pkg/apis/v1alpha1/managers/stage/stage-manager.go
@@ -631,6 +631,7 @@ func (s *StageManager) HandleTriggerEvent(ctx context.Context, campaign model.Ca
 
 		waitGroup.Wait()
 		close(results)
+		// DO NOT REMOVE THIS COMMENT
 		// gofail: var afterProvider string
 
 		outputs := make(map[string]interface{})
diff --git a/api/pkg/apis/v1alpha1/providers/stage/materialize/materialize.go b/api/pkg/apis/v1alpha1/providers/stage/materialize/materialize.go
index 16be7a9f2..55774f32e 100644
--- a/api/pkg/apis/v1alpha1/providers/stage/materialize/materialize.go
+++ b/api/pkg/apis/v1alpha1/providers/stage/materialize/materialize.go
@@ -553,6 +553,7 @@ func (i *MaterializeStageProvider) Process(ctx context.Context, mgrContext conte
 			}
 			createdObjectList[catalog.ObjectMeta.Name] = true
 		}
+		// DO NOT REMOVE THIS COMMENT
 		// gofail: var afterMaterializeOnce bool
 	}
 	if len(createdObjectList) < len(objects) {
diff --git a/api/pkg/apis/v1alpha1/vendors/stage-vendor.go b/api/pkg/apis/v1alpha1/vendors/stage-vendor.go
index 8af0f80fe..aa4624911 100644
--- a/api/pkg/apis/v1alpha1/vendors/stage-vendor.go
+++ b/api/pkg/apis/v1alpha1/vendors/stage-vendor.go
@@ -119,6 +119,7 @@ func (s *StageVendor) Init(config vendors.VendorConfig, factories []managers.IMa
 					Context: ctx,
 				})
 			}
+			// DO NOT REMOVE THIS COMMENT
 			// gofail: var afterPublishTrigger string
 			return nil
 		},
diff --git a/coa/pkg/apis/v1alpha2/providers/pubsub/redis/redis.go b/coa/pkg/apis/v1alpha2/providers/pubsub/redis/redis.go
index 358ed01ec..2bb7bfeca 100644
--- a/coa/pkg/apis/v1alpha2/providers/pubsub/redis/redis.go
+++ b/coa/pkg/apis/v1alpha2/providers/pubsub/redis/redis.go
@@ -218,6 +218,7 @@ func (i *RedisPubSubProvider) pollNewMessages(topic string, handler v1alpha2.Eve
 	}()
 
 	for {
+		// DO NOT REMOVE THIS COMMENT
 		// gofail: var PollNewMessagesLoop string
 		if i.Ctx.Err() != nil {
 			return
diff --git a/k8s/apis/fabric/v1/target_webhook.go b/k8s/apis/fabric/v1/target_webhook.go
index fd8b6e033..e7c23767c 100644
--- a/k8s/apis/fabric/v1/target_webhook.go
+++ b/k8s/apis/fabric/v1/target_webhook.go
@@ -133,6 +133,7 @@ func (r *Target) ValidateCreate() (admission.Warnings, error) {
 	operationName := fmt.Sprintf("%s/%s", constants.TargetOperationNamePrefix, constants.ActivityOperation_Write)
 	ctx := configutils.PopulateActivityAndDiagnosticsContextFromAnnotations(r.GetNamespace(), resourceK8SId, r.Annotations, operationName, myTargetClient, context.TODO(), targetlog)
 
+	// DO NOT REMOVE THIS COMMENT
 	// gofail: var validateError error
 
 	diagnostic.InfoWithCtx(targetlog, ctx, "validate create", "name", r.Name, "namespace", r.Namespace)
diff --git a/k8s/controllers/fabric/target_polling_controller.go b/k8s/controllers/fabric/target_polling_controller.go
index d737cd38d..30261c992 100644
--- a/k8s/controllers/fabric/target_polling_controller.go
+++ b/k8s/controllers/fabric/target_polling_controller.go
@@ -39,6 +39,7 @@ type TargetPollingReconciler struct {
 func (r *TargetPollingReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
 	log := ctrllog.FromContext(ctx)
 	diagnostic.InfoWithCtx(log, ctx, "Reconcile Polling Target", "Name", req.Name, "Namespace", req.Namespace)
+
 	// Initialize reconcileTime for latency metrics
 	reconcileTime := time.Now()
 
diff --git a/k8s/controllers/solution/instance_polling_controller.go b/k8s/controllers/solution/instance_polling_controller.go
index 434dde3cf..eebb8d527 100644
--- a/k8s/controllers/solution/instance_polling_controller.go
+++ b/k8s/controllers/solution/instance_polling_controller.go
@@ -41,6 +41,7 @@ func (r *InstancePollingReconciler) Reconcile(ctx context.Context, req ctrl.Requ
 	log := ctrllog.FromContext(ctx)
 	log.Info("Reconcile Polling Instance " + req.Name + " in namespace " + req.Namespace)
 
+	// DO NOT REMOVE THIS COMMENT
 	// gofail: var beforePollingResult string
 
 	// Initialize reconcileTime for latency metrics
@@ -87,6 +88,7 @@ func (r *InstancePollingReconciler) Reconcile(ctx context.Context, req ctrl.Requ
 		metrics.InstanceResourceType,
 		deploymentOperationType,
 	)
+	// DO NOT REMOVE THIS COMMENT
 	// gofail: var afterPollingResult string
 	return reconcileResult, err
 }
diff --git a/k8s/reconcilers/deployment.go b/k8s/reconcilers/deployment.go
index 45c041250..78f8a5009 100644
--- a/k8s/reconcilers/deployment.go
+++ b/k8s/reconcilers/deployment.go
@@ -147,6 +147,7 @@ func (r *DeploymentReconciler) populateDiagnosticsAndActivitiesFromAnnotations(c
 
 // attemptUpdate attempts to update the instance
 func (r *DeploymentReconciler) AttemptUpdate(ctx context.Context, object Reconcilable, isRemoval bool, log logr.Logger, operationStartTimeKey string, operationName string) (metrics.OperationStatus, reconcile.Result, error) {
+	// DO NOT REMOVE THIS COMMENT
 	// gofail: var delayAttemptUpdate string
 
 	// populate diagnostics and activities from annotations
@@ -196,11 +197,13 @@ func (r *DeploymentReconciler) AttemptUpdate(ctx context.Context, object Reconci
 		diagnostic.ErrorWithCtx(log, ctx, err, "failed to update jobid")
 		return metrics.StatusUpdateFailed, ctrl.Result{}, err
 	}
+	// DO NOT REMOVE THIS COMMENT
 	// gofail: var beforeQueueJob string
 	if err := r.queueDeploymentJob(ctx, object, isRemoval, operationStartTimeKey); err != nil {
 		diagnostic.ErrorWithCtx(log, ctx, err, "failed to queue deployment job")
 		return r.handleDeploymentError(ctx, object, nil, isRemoval, reconciliationInterval, err, log)
 	}
+	// DO NOT REMOVE THIS COMMENT
 	// gofail: var afterQueueJob string
 
 	diagnostic.InfoWithCtx(log, ctx, "Updating object status with deployment queued")
@@ -221,6 +224,7 @@ func (r *DeploymentReconciler) AttemptUpdate(ctx context.Context, object Reconci
 }
 
 func (r *DeploymentReconciler) PollingResult(ctx context.Context, object Reconcilable, isRemoval bool, log logr.Logger, operationStartTimeKey string, operationName string) (metrics.OperationStatus, reconcile.Result, error) {
+	// DO NOT REMOVE THIS COMMENT
 	// gofail: var delayBeforePolling string
 
 	// populate diagnostics and activities from annotations
diff --git a/test/integration/scenarios/faultTests/README.md b/test/integration/scenarios/faultTests/README.md
index 15eca9f2d..28e0cdb50 100644
--- a/test/integration/scenarios/faultTests/README.md
+++ b/test/integration/scenarios/faultTests/README.md
@@ -12,6 +12,7 @@ This is a framework for symphony fault tests.
 
 Add a comment line like example blow. Gofail package will translate it to the failpoint code when compiled with failpoint enabled.
 ```
+// DO NOT REMOVE THIS COMMENT
 // gofail: var beforeProviders string
 ```
 
diff --git a/test/integration/scenarios/faultTests/magefile.go b/test/integration/scenarios/faultTests/magefile.go
index 153367f67..fe1a98e41 100644
--- a/test/integration/scenarios/faultTests/magefile.go
+++ b/test/integration/scenarios/faultTests/magefile.go
@@ -17,6 +17,7 @@ import (
 	"github.com/princjef/mageutil/shellcmd"
 )
 
+// Entry point for running the tests
 func FaultTests() error {
 	fmt.Println("Running fault injection tests")
 
@@ -56,7 +57,6 @@ func FaultTestHelper(test utils.FaultTestCase) error {
 	return err
 }
 
-// Run tests for scenarios/update
 func Verify(test string) error {
 	err := shellcmd.Command("go clean -testcache").Run()
 	if err != nil {
diff --git a/test/integration/scenarios/faultTests/utils/constants.go b/test/integration/scenarios/faultTests/utils/constants.go
new file mode 100644
index 000000000..fba5d29e8
--- /dev/null
+++ b/test/integration/scenarios/faultTests/utils/constants.go
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) Microsoft Corporation.
+ * Licensed under the MIT license.
+ * SPDX-License-Identifier: MIT
+ */
+
+package utils
+
+const (
+	TEST_TIMEOUT      = "30m"
+	LocalPortForward  = "22381"
+	InjectFaultEnvKey = "InjectFaultCommand"
+	DeleteFaultEnvKey = "DeleteFaultCommand"
+	PodEnvKey         = "InjectPodLabel"
+)
+
+type FaultTestCase struct {
+	TestCase  string
+	PodLabel  string
+	Fault     string
+	FaultType string
+}
+
+var (
+	TestCases = map[string]string{
+		"solutionUpdate":      "./solution/update/verify/...",
+		"workflowMaterialize": "./workflow/materialize/verify/...",
+	}
+
+	PodLabels = map[string]string{
+		"api": "app=symphony-api",
+		"k8s": "control-plane=symphony-controller-manager",
+	}
+	Faults = []FaultTestCase{
+		{
+			TestCase:  TestCases["solutionUpdate"],
+			PodLabel:  PodLabels["api"],
+			Fault:     "onQueueError",
+			FaultType: DefaultFaultType,
+		},
+		{
+			TestCase:  TestCases["solutionUpdate"],
+			PodLabel:  PodLabels["api"],
+			Fault:     "beforeProviders",
+			FaultType: DefaultFaultType,
+		},
+		{
+			TestCase:  TestCases["solutionUpdate"],
+			PodLabel:  PodLabels["api"],
+			Fault:     "beforeDeploymentError",
+			FaultType: DefaultFaultType,
+		},
+		{
+			TestCase:  TestCases["solutionUpdate"],
+			PodLabel:  PodLabels["api"],
+			Fault:     "afterDeploymentError",
+			FaultType: DefaultFaultType,
+		},
+		{
+			TestCase:  TestCases["solutionUpdate"],
+			PodLabel:  PodLabels["api"],
+			Fault:     "beforeConcludeSummary",
+			FaultType: DefaultFaultType,
+		},
+		{
+			TestCase:  TestCases["solutionUpdate"],
+			PodLabel:  PodLabels["api"],
+			Fault:     "beforeConcludeSummary",
+			FaultType: DefaultFaultType,
+		},
+		{
+			TestCase:  TestCases["solutionUpdate"],
+			PodLabel:  PodLabels["k8s"],
+			Fault:     "beforePollingResult",
+			FaultType: DefaultFaultType,
+		},
+		{
+			TestCase:  TestCases["solutionUpdate"],
+			PodLabel:  PodLabels["k8s"],
+			Fault:     "afterPollingResult",
+			FaultType: DefaultFaultType,
+		},
+		{
+			TestCase:  TestCases["solutionUpdate"],
+			PodLabel:  PodLabels["k8s"],
+			Fault:     "beforeQueueJob",
+			FaultType: DefaultFaultType,
+		},
+		{
+			TestCase:  TestCases["solutionUpdate"],
+			PodLabel:  PodLabels["k8s"],
+			Fault:     "afterQueueJob",
+			FaultType: DefaultFaultType,
+		},
+		{
+			TestCase:  TestCases["workflowMaterialize"],
+			PodLabel:  PodLabels["api"],
+			Fault:     "afterMaterializeOnce",
+			FaultType: DefaultFaultType,
+		},
+		{
+			TestCase:  TestCases["workflowMaterialize"],
+			PodLabel:  PodLabels["api"],
+			Fault:     "afterProvider",
+			FaultType: DefaultFaultType,
+		},
+		// afterPublishTrigger fault test cannot pass now because of dedup issue in activation
+		// {
+		// 	testCase:  TestCases["workflowMaterialize"],
+		// 	podLabel:  PodLabels["api"],
+		// 	fault:     "afterPublishTrigger",
+		// 	faultType: DefaultFaultType,
+		// },
+		{
+			TestCase:  TestCases["workflowMaterialize"],
+			PodLabel:  PodLabels["api"],
+			Fault:     "afterRunTrigger",
+			FaultType: DefaultFaultType,
+		},
+	}
+
+	DefaultFaultType = "100.0%panic"
+)
diff --git a/test/integration/scenarios/faultTests/utils/helpers.go b/test/integration/scenarios/faultTests/utils/helpers.go
new file mode 100644
index 000000000..98656e244
--- /dev/null
+++ b/test/integration/scenarios/faultTests/utils/helpers.go
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) Microsoft Corporation.
+ * Licensed under the MIT license.
+ * SPDX-License-Identifier: MIT
+ */
+
+package utils
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"time"
+
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+
+	"github.com/eclipse-symphony/symphony/test/integration/lib/testhelpers"
+	"github.com/princjef/mageutil/shellcmd"
+)
+
+func WaitFailpointServer(podlabel string) error {
+	clientset, err := testhelpers.KubeClient()
+	if err != nil {
+		return err
+	}
+	err = testhelpers.WaitPodOnline(podlabel)
+	if err != nil {
+		return err
+	}
+	pods := clientset.CoreV1().Pods("default")
+	for i := 0; i < 10; i++ {
+		podList, err := pods.List(context.Background(), metav1.ListOptions{
+			LabelSelector: podlabel,
+		})
+		if err != nil {
+			return err
+		}
+		if len(podList.Items) > 0 {
+			pod := podList.Items[0]
+			if pod.Status.Phase == corev1.PodRunning {
+				err = testhelpers.ShellExec(fmt.Sprintf("kubectl exec %s -- curl localhost:22381", pod.Name))
+				if err == nil {
+					return nil
+				} else {
+					fmt.Println("failed to connect to failpoint server, waiting...")
+				}
+			} else {
+				fmt.Println("pod not ready yet, waiting..." + pod.Status.Phase)
+			}
+		}
+		time.Sleep(time.Second * 10)
+	}
+	return fmt.Errorf("timeout waiting for pod to be ready")
+}
+
+func InjectPodFailure() error {
+	InjectCommand := os.Getenv(InjectFaultEnvKey)
+	PodLabel := os.Getenv(PodEnvKey)
+	if InjectCommand == "" || PodLabel == "" {
+		fmt.Println("InjectCommand is ", InjectCommand, "and InjectPodLabel is ", PodLabel, ", skip error injection")
+		return nil
+	}
+
+	WaitFailpointServer(PodLabel)
+	err := shellcmd.Command(InjectCommand).Run()
+	if err != nil {
+		fmt.Println("Failed to inject pod failure: " + err.Error())
+	}
+	fmt.Println("Injected fault")
+	return err
+}
+
+func DeletePodFailure() error {
+	DeleteCommand := os.Getenv(DeleteFaultEnvKey)
+	PodLabel := os.Getenv(PodEnvKey)
+	if DeleteCommand == "" || PodLabel == "" {
+		fmt.Println("DeleteCommand is ", DeleteCommand, "and PodLabel is ", PodLabel, ", skip error injection")
+		return nil
+	}
+
+	WaitFailpointServer(PodLabel)
+	err := shellcmd.Command(DeleteCommand).Run()
+	if err != nil {
+		fmt.Println("Failed to delete pod failure: " + err.Error())
+	}
+	fmt.Println("Deleted fault")
+	return err
+}

From 416a599486a40a0ed6b8135e2304ceccea97ca6c Mon Sep 17 00:00:00 2001
From: Xingdong <xdlisjtu@gmail.com>
Date: Wed, 13 Nov 2024 21:01:43 +0800
Subject: [PATCH 5/9] fix comments

---
 k8s/Dockerfile                                |  3 ++-
 packages/go.work.sum                          | 26 +++++++++++++++++++
 .../scenarios/faultTests/README.md            |  2 +-
 3 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/k8s/Dockerfile b/k8s/Dockerfile
index 407a541ee..86b6dcf6b 100644
--- a/k8s/Dockerfile
+++ b/k8s/Dockerfile
@@ -28,6 +28,7 @@ ARG FAULT_INJECTION_ENABLED=false
 # Install gcc, g++ and other necessary build tools
 RUN if echo "${BUILD_BASE_IMAGE}" | grep "alpine"; then \
         apk add --no-cache gcc musl-dev; \
+        apk update && apk add curl; \
     elif echo "${BUILD_BASE_IMAGE}" | grep "mariner"; then \
         tdnf install -y gcc glibc-devel && tdnf clean all; \
     else \
@@ -62,7 +63,7 @@ fi
 # Build
 RUN CGO_ENABLED=0 mage build
 FROM ${TARGET_BASE_IMAGE} AS manager
-RUN apk update && apk add curl
+
 WORKDIR /
 COPY --from=builder /k8s/bin/manager .
 USER 65532:65532
diff --git a/packages/go.work.sum b/packages/go.work.sum
index 30509f35e..f00f8b1ae 100644
--- a/packages/go.work.sum
+++ b/packages/go.work.sum
@@ -1,5 +1,6 @@
 bazil.org/fuse v0.0.0-20160811212531-371fbbdaa898/go.mod h1:Xbm+BRKSBEpa4q4hTSxohYNQpsxXPbPry4JJWOB3LB8=
 bazil.org/fuse v0.0.0-20200407214033-5883e5a4b512/go.mod h1:FbcW6z/2VytnFDhZfumh8Ss8zxHE6qpMP5sHTRe0EaM=
+cel.dev/expr v0.15.0/go.mod h1:TRSuuV7DlVCE/uwv5QbAiW/v8l5O8C4eEPHeu7gf7Sg=
 cloud.google.com/go v0.110.2/go.mod h1:k04UEeEtb6ZBRTv3dZz4CeJC3jKGxyhl0sAiVVquxiw=
 cloud.google.com/go v0.110.7/go.mod h1:+EYjdK8e5RME/VY/qLCAtuyALQ9q67dvuum8i+H5xsI=
 cloud.google.com/go/accessapproval v1.7.1/go.mod h1:JYczztsHRMK7NTXb6Xw+dwbs/WnOJxbo/2mTI+Kgg68=
@@ -31,6 +32,7 @@ cloud.google.com/go/compute v1.20.1/go.mod h1:4tCnrn48xsqlwSAiLf1HXMQk8CONslYbdi
 cloud.google.com/go/compute v1.23.0/go.mod h1:4tCnrn48xsqlwSAiLf1HXMQk8CONslYbdiEZc9FEIbM=
 cloud.google.com/go/compute v1.23.3/go.mod h1:VCgBUoMnIVIR0CscqQiPJLAG25E3ZRZMzcFZeQ+h8CI=
 cloud.google.com/go/compute/metadata v0.2.3/go.mod h1:VAV5nSsACxMJvgaAuX6Pk2AawlZn8kiOGuCv6gTkwuA=
+cloud.google.com/go/compute/metadata v0.3.0/go.mod h1:zFmK7XCadkQkj6TtorcaGlCW1hT1fIilQDwofLpJ20k=
 cloud.google.com/go/contactcenterinsights v1.10.0/go.mod h1:bsg/R7zGLYMVxFFzfh9ooLTruLRCG9fnzhH9KznHhbM=
 cloud.google.com/go/container v1.24.0/go.mod h1:lTNExE2R7f+DLbAN+rJiKTisauFCaoDq6NURZ83eVH4=
 cloud.google.com/go/containeranalysis v0.10.1/go.mod h1:Ya2jiILITMY68ZLPaogjmOMNkwsDrWBSTyBubGXO7j0=
@@ -155,7 +157,9 @@ github.com/Masterminds/sprig v2.22.0+incompatible h1:z4yfnGrZ7netVz+0EDJ0Wi+5VZC
 github.com/Masterminds/sprig/v3 v3.2.1/go.mod h1:UoaO7Yp8KlPnJIYWTFkMaqPUYKTfGFPhxNuwnnxkKlk=
 github.com/Masterminds/vcs v1.13.3/go.mod h1:TiE7xuEjl1N4j016moRd6vezp6e6Lz23gypeXfzXeW8=
 github.com/Microsoft/cosesign1go v1.1.0/go.mod h1:o+sw7nhlGE6twhfjXQDWmBJO8zmfQXEmCcXEi3zha8I=
+github.com/Microsoft/cosesign1go v1.2.0/go.mod h1:1La/HcGw19rRLhPW0S6u55K6LKfti+GQSgGCtrfhVe8=
 github.com/Microsoft/didx509go v0.0.2/go.mod h1:F+msvNlKCEm3RgUE3kRpi7E+6hdR6r5PtOLWQKYfGbs=
+github.com/Microsoft/didx509go v0.0.3/go.mod h1:wWt+iQsLzn3011+VfESzznLIp/Owhuj7rLF7yLglYbk=
 github.com/Microsoft/go-winio v0.4.11/go.mod h1:VhR8bwka0BXejwEJY73c50VrPtXAaKcyvVC4A4RozmA=
 github.com/Microsoft/go-winio v0.4.14/go.mod h1:qXqCSQ3Xa7+6tgxaGTIe4Kpcdsi+P8jBhyzoq1bpyYA=
 github.com/Microsoft/go-winio v0.4.15-0.20190919025122-fc70bd9a86b5/go.mod h1:tTuCMEN+UleMWgg9dVx4Hu52b1bJo+59jBh3ajtinzw=
@@ -176,8 +180,10 @@ github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2/go.mod h1:uGdko
 github.com/Shopify/logrus-bugsnag v0.0.0-20171204204709-577dee27f20d/go.mod h1:HI8ITrYtUY+O+ZhtlqUnD8+KwNPOyugEhfP9fdUIaEQ=
 github.com/a8m/expect v1.0.0/go.mod h1:4IwSCMumY49ScypDnjNbYEjgVeqy1/U2cEs3Lat96eA=
 github.com/agnivade/levenshtein v1.0.1/go.mod h1:CURSv5d9Uaml+FovSIICkLbAUZ9S4RqaHDIsdSBg7lM=
+github.com/agnivade/levenshtein v1.1.1/go.mod h1:veldBMzWxcCG2ZvUTKD2kJNRdCk5hVbJomOvKkmgYbo=
 github.com/akavel/rsrc v0.10.2/go.mod h1:uLoCtb9J+EyAqh+26kdrTgmzRBFPGOolLWKpdxkKq+c=
 github.com/alecthomas/kingpin/v2 v2.3.2/go.mod h1:0gyi0zQnjuFk8xrkNKamJoyUo382HRL7ATRpFZCw6tE=
+github.com/alecthomas/kingpin/v2 v2.4.0/go.mod h1:0gyi0zQnjuFk8xrkNKamJoyUo382HRL7ATRpFZCw6tE=
 github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137/go.mod h1:OMCwj8VM1Kc9e19TLln2VL61YJF0x1XFtfdL4JdbSyE=
 github.com/alexflint/go-filemutex v0.0.0-20171022225611-72bdc8eae2ae/go.mod h1:CgnQgUtFrFz9mxFNtED3jI5tLDjKlOM+oUF/sTk6ps0=
 github.com/alexflint/go-filemutex v1.1.0/go.mod h1:7P4iRhttt/nUvUOrYIhcpMzv2G6CY9UnI16Z+UJqRyk=
@@ -230,6 +236,7 @@ github.com/cncf/xds/go v0.0.0-20210312221358-fbca930ec8ed/go.mod h1:eXthEFrGJvWH
 github.com/cncf/xds/go v0.0.0-20210805033703-aa0b78936158/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs=
 github.com/cncf/xds/go v0.0.0-20230607035331-e9ce68804cb4/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs=
 github.com/cncf/xds/go v0.0.0-20231128003011-0fa0005c9caa/go.mod h1:x/1Gn8zydmfq8dk6e9PdstVsDgu9RuyIIJqAaF//0IM=
+github.com/cncf/xds/go v0.0.0-20240423153145-555b57ec207b/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8=
 github.com/cockroachdb/datadriven v0.0.0-20190809214429-80d97fb3cbaa/go.mod h1:zn76sxSg3SzpJ0PPJaLDCu+Bu0Lg3sKTORVIj19EIF8=
 github.com/cockroachdb/datadriven v0.0.0-20200714090401-bf6692d28da5/go.mod h1:h6jFvWxBdQXxjopDMZyH2UVceIRfR84bdzbkoKrsWNo=
 github.com/cockroachdb/errors v1.2.4/go.mod h1:rQD95gz6FARkaKkQXUksEje/d9a6wBJoCr5oaCLELYA=
@@ -259,6 +266,7 @@ github.com/containerd/console v0.0.0-20191206165004-02ecf6a7291e/go.mod h1:8Pf4g
 github.com/containerd/console v1.0.1/go.mod h1:XUsP6YE/mKtz6bxc+I8UiKKTP04qjQL4qcS3XoQ5xkw=
 github.com/containerd/console v1.0.2/go.mod h1:ytZPjGgY2oeTkAONYafi2kSj0aYggsf8acV1PGKCbzQ=
 github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U=
+github.com/containerd/console v1.0.4/go.mod h1:YynlIjWYF8myEu6sdkwKIvGQq+cOckRm6So2avqoYAk=
 github.com/containerd/containerd/api v1.7.19/go.mod h1:fwGavl3LNwAV5ilJ0sbrABL44AQxmNjDRcwheXDb6Ig=
 github.com/containerd/continuity v0.0.0-20190426062206-aaeac12a7ffc/go.mod h1:GL3xCUCBDV3CZiTSEKksMWbLE66hEyuu9qyDOOqM47Y=
 github.com/containerd/continuity v0.0.0-20190815185530-f2a389ac0a02/go.mod h1:GL3xCUCBDV3CZiTSEKksMWbLE66hEyuu9qyDOOqM47Y=
@@ -403,6 +411,7 @@ github.com/envoyproxy/go-control-plane v0.9.9-0.20210512163311-63b5d3c536b0/go.m
 github.com/envoyproxy/go-control-plane v0.9.10-0.20210907150352-cf90f659a021/go.mod h1:AFq3mo9L8Lqqiid3OhADV3RfLJnjiw63cSpi+fDTRC0=
 github.com/envoyproxy/go-control-plane v0.11.1/go.mod h1:uhMcXKCQMEJHiAb0w+YGefQLaTEw+YhGluxZkrTmD0g=
 github.com/envoyproxy/go-control-plane v0.12.0/go.mod h1:ZBTaoJ23lqITozF0M6G4/IragXCQKCnYbmlmtHvwRG0=
+github.com/envoyproxy/go-control-plane v0.12.1-0.20240621013728-1eb8caab5155/go.mod h1:5Wkq+JduFtdAXihLmeTJf+tRYIT4KBc2vPXDhwVo1pA=
 github.com/envoyproxy/protoc-gen-validate v1.0.2/go.mod h1:GpiZQP3dDbg4JouG/NNS7QWXpgx6x8QiMKdmN72jogE=
 github.com/envoyproxy/protoc-gen-validate v1.0.4/go.mod h1:qys6tmnRsYrQqIhm2bvKZH4Blx/1gTIZ2UKVY1M+Yew=
 github.com/evanphx/json-patch v4.9.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk=
@@ -433,6 +442,7 @@ github.com/ghodss/yaml v0.0.0-20150909031657-73d445a93680/go.mod h1:4dBDuWmgqj2H
 github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
 github.com/go-ini/ini v1.25.4/go.mod h1:ByCAeIL28uOIIG0E3PJtZPDL8WnHpFKFOtgjp+3Ies8=
 github.com/go-ini/ini v1.66.6/go.mod h1:ByCAeIL28uOIIG0E3PJtZPDL8WnHpFKFOtgjp+3Ies8=
+github.com/go-ini/ini v1.67.0/go.mod h1:ByCAeIL28uOIIG0E3PJtZPDL8WnHpFKFOtgjp+3Ies8=
 github.com/go-jose/go-jose/v3 v3.0.3/go.mod h1:5b+7YgP7ZICgJDBdfjZaIt+H/9L9T/YQrVfLAMboGkQ=
 github.com/go-kit/log v0.2.0/go.mod h1:NwTd00d/i8cPZ3xOwwiv2PO5MOcx78fFErGNcVmBjv0=
 github.com/go-kit/log v0.2.1/go.mod h1:NwTd00d/i8cPZ3xOwwiv2PO5MOcx78fFErGNcVmBjv0=
@@ -479,6 +489,7 @@ github.com/golang-sql/civil v0.0.0-20190719163853-cb61b32ac6fe/go.mod h1:8vg3r2V
 github.com/golang/glog v1.0.0/go.mod h1:EWib/APOK0SL3dFbYqvxE3UYd8E6s1ouQ7iEp/0LWV4=
 github.com/golang/glog v1.1.2/go.mod h1:zR+okUeTbrL6EL3xHUDxZuEtGv04p5shwip1+mL/rLQ=
 github.com/golang/glog v1.2.0/go.mod h1:6AhwSGph0fcJtXVM/PEHPqZlFeoLxhs7/t5UDAwmO+w=
+github.com/golang/glog v1.2.1/go.mod h1:6AhwSGph0fcJtXVM/PEHPqZlFeoLxhs7/t5UDAwmO+w=
 github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
 github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
 github.com/golang/mock v1.6.0/go.mod h1:p6yTPP+5HYm5mzsMV8JkE6ZKdX+/wYM6Hr+LicevLPs=
@@ -498,6 +509,7 @@ github.com/google/go-cmp v0.5.7/go.mod h1:n+brtR0CgQNWTVd5ZUFpTBC8YFBDLK/h/bpaJ8
 github.com/google/go-containerregistry v0.5.1/go.mod h1:Ct15B4yir3PLOP5jsy0GNeYVaIZs/MK/Jz5any1wFW0=
 github.com/google/go-containerregistry v0.14.0/go.mod h1:aiJ2fp/SXvkWgmYHioXnbMdlgB8eXiiYOY55gfN91Wk=
 github.com/google/go-containerregistry v0.17.0/go.mod h1:u0qB2l7mvtWVR5kNcbFIhFY1hLbf8eeGapA+vbFDCtQ=
+github.com/google/go-containerregistry v0.20.1/go.mod h1:YCMFNQeeXeLF+dnhhWkqDItx/JSkH01j1Kis4PsjzFI=
 github.com/google/gofuzz v1.1.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
 github.com/google/martian/v3 v3.3.2/go.mod h1:oBOf6HBosgwRXnUGWUB05QECsc6uvmMiJ3+6W4l/CUk=
 github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
@@ -565,6 +577,7 @@ github.com/lestrrat-go/iter v1.0.1/go.mod h1:zIdgO1mRKhn8l9vrZJZz9TUMMFbQbLeTsbq
 github.com/lestrrat-go/iter v1.0.2/go.mod h1:Momfcq3AnRlRjI5b5O8/G5/BvpzrhoFTZcn06fEOPt4=
 github.com/lestrrat-go/jwx v1.2.25/go.mod h1:zoNuZymNl5lgdcu6P7K6ie2QRll5HVfF4xwxBBK1NxY=
 github.com/lestrrat-go/jwx v1.2.28/go.mod h1:nF+91HEMh/MYFVwKPl5HHsBGMPscqbQb+8IDQdIazP8=
+github.com/lestrrat-go/jwx v1.2.29/go.mod h1:hU8k2l6WF0ncx20uQdOmik/Gjg6E3/wIRtXSNFeZuB8=
 github.com/lestrrat-go/option v1.0.0/go.mod h1:5ZHFbivi4xwXxhxY9XHDe2FHo6/Z7WWmtT7T5nBBp3I=
 github.com/lestrrat-go/option v1.0.1/go.mod h1:5ZHFbivi4xwXxhxY9XHDe2FHo6/Z7WWmtT7T5nBBp3I=
 github.com/lib/pq v1.10.7/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
@@ -581,6 +594,7 @@ github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzp
 github.com/mattn/go-shellwords v1.0.3/go.mod h1:3xCvwCdWdlDJUrvuMn7Wuy9eWs4pE8vqg+NOMyg4B2o=
 github.com/mattn/go-shellwords v1.0.6/go.mod h1:3xCvwCdWdlDJUrvuMn7Wuy9eWs4pE8vqg+NOMyg4B2o=
 github.com/mattn/go-shellwords v1.0.12/go.mod h1:EZzvwXDESEeg03EKmM+RmDnNOPKG4lLtQsUlTZDWQ8Y=
+github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4=
 github.com/maxbrunsfeld/counterfeiter/v6 v6.2.2/go.mod h1:eD9eIE7cdwcMi9rYluz88Jz2VyhSmden33/aXg4oVIY=
 github.com/miekg/dns v1.1.43/go.mod h1:+evo5L0630/F6ca/Z9+GAqzhjGyn8/c+TBaOyfEl0V4=
 github.com/miekg/pkcs11 v1.0.3/go.mod h1:XsNlhZGX73bx86s2hdc/FuaLm2CPZJemRLMA+WTFxgs=
@@ -592,6 +606,7 @@ github.com/mitchellh/cli v1.1.5/go.mod h1:v8+iFts2sPIKUV1ltktPXMCC8fumSKFItNcD2c
 github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
 github.com/mitchellh/osext v0.0.0-20151018003038-5e2d6d41470f/go.mod h1:OkQIRizQZAeMln+1tSwduZz7+Af5oFlKirV/MSYes2A=
 github.com/mndrix/tap-go v0.0.0-20171203230836-629fa407e90b/go.mod h1:pzzDgJWZ34fGzaAZGFW22KVZDfyrYW+QABMrWnJBnSs=
+github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo=
 github.com/moby/sys/mountinfo v0.4.0/go.mod h1:rEr8tzG/lsIZHBtN/JjGG+LMYx9eXgW2JI+6q0qou+A=
 github.com/moby/sys/mountinfo v0.4.1/go.mod h1:rEr8tzG/lsIZHBtN/JjGG+LMYx9eXgW2JI+6q0qou+A=
 github.com/moby/sys/sequential v0.5.0/go.mod h1:tH2cOOs5V9MlPiXcQzRC+eEyab644PWKGRYaaV5ZZlo=
@@ -600,6 +615,7 @@ github.com/moby/sys/signal v0.7.0/go.mod h1:GQ6ObYZfqacOwTtlXvcmh9A26dVRul/hbOZn
 github.com/moby/sys/symlink v0.1.0/go.mod h1:GGDODQmbFOjFsXvfLVn3+ZRxkch54RkSiGqsZeMYowQ=
 github.com/moby/sys/symlink v0.2.0/go.mod h1:7uZVF2dqJjG/NsClqul95CqKOBRQyYSNnJ6BMgR/gFs=
 github.com/moby/sys/user v0.1.0/go.mod h1:fKJhFOnsCN6xZ5gSfbM6zaHGgDJMrqt9/reuj4T7MmU=
+github.com/moby/sys/user v0.3.0/go.mod h1:bG+tYYYJgaMtRKgEmuueC0hJEAZWwtIbZTB+85uoHjs=
 github.com/moby/term v0.0.0-20200312100748-672ec06f55cd/go.mod h1:DdlQx2hp0Ss5/fLikoLlEeIYiATotOjgB//nb973jeo=
 github.com/moby/term v0.0.0-20210610120745-9d4ed1856297/go.mod h1:vgPCkQMyxTZ7IDy8SXRufE172gr8+K/JE/7hHFxHW3A=
 github.com/moby/term v0.0.0-20221205130635-1aeaba878587/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
@@ -644,6 +660,7 @@ github.com/onsi/gomega v1.19.0/go.mod h1:LY+I3pBVzYsTBU1AnDwOSxaYi9WoWiqgwooUqq9
 github.com/onsi/gomega v1.27.10/go.mod h1:RsS8tutOdbdgzbPtzzATp12yT7kM5I5aElG3evPbQ0M=
 github.com/onsi/gomega v1.29.0/go.mod h1:9sxs+SwGrKI0+PWe4Fxa9tFQQBG5xSsSbMXOI8PPpoQ=
 github.com/open-policy-agent/opa v0.42.2/go.mod h1:MrmoTi/BsKWT58kXlVayBb+rYVeaMwuBm3nYAN3923s=
+github.com/open-policy-agent/opa v0.68.0/go.mod h1:5E5SvaPwTpwt2WM177I9Z3eT7qUpmOGjk1ZdHs+TZ4w=
 github.com/opencontainers/go-digest v0.0.0-20170106003457-a6d0ee40d420/go.mod h1:cMLVZDEM3+U2I4VmLI6N8jQYUd2OVphdqWwCJHrFt2s=
 github.com/opencontainers/go-digest v0.0.0-20180430190053-c9281466c8b2/go.mod h1:cMLVZDEM3+U2I4VmLI6N8jQYUd2OVphdqWwCJHrFt2s=
 github.com/opencontainers/go-digest v1.0.0-rc1/go.mod h1:cMLVZDEM3+U2I4VmLI6N8jQYUd2OVphdqWwCJHrFt2s=
@@ -656,6 +673,7 @@ github.com/opencontainers/image-spec v1.1.0-rc2.0.20221005185240-3a7f492d3f1b/go
 github.com/opencontainers/image-spec v1.1.0-rc5/go.mod h1:X4pATf0uXsnn3g5aiGIsVnJBR4mxhKzfwmvK/B2NTm8=
 github.com/opencontainers/runc v1.1.12 h1:BOIssBaW1La0/qbNZHXOOa71dZfZEQOzW7dqQf3phss=
 github.com/opencontainers/runc v1.1.12/go.mod h1:S+lQwSfncpBha7XTy/5lBwWgm5+y5Ma/O44Ekby9FK8=
+github.com/opencontainers/runc v1.1.14/go.mod h1:E4C2z+7BxR7GHXp0hAY53mek+x49X1LjPNeMTfRGvOA=
 github.com/opencontainers/runtime-spec v0.1.2-0.20190507144316-5b71a03e2700/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
 github.com/opencontainers/runtime-spec v1.0.1/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
 github.com/opencontainers/runtime-spec v1.0.2-0.20190207185410-29686dbc5559/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
@@ -664,6 +682,7 @@ github.com/opencontainers/runtime-spec v1.0.3-0.20200929063507-e6143ca7d51d/go.m
 github.com/opencontainers/runtime-spec v1.0.3-0.20220825212826-86290f6a00fb/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
 github.com/opencontainers/runtime-spec v1.1.0-rc.1/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
 github.com/opencontainers/runtime-spec v1.1.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
+github.com/opencontainers/runtime-spec v1.2.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
 github.com/opencontainers/runtime-tools v0.0.0-20181011054405-1d69bd0f9c39/go.mod h1:r3f7wjNzSs2extwzU3Y+6pKfobzPh+kKFJ3ofN+3nfs=
 github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626/go.mod h1:BRHJJd0E+cx42OybVYSgUvZmU0B8P9gZuRXlZUP7TKI=
 github.com/opencontainers/selinux v1.6.0/go.mod h1:VVGKuOLlE7v4PJyT6h7mNWvq1rzqiriPsEqVhc+svHE=
@@ -678,6 +697,7 @@ github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCko
 github.com/peterh/liner v0.0.0-20170211195444-bf27d3ba8e1d/go.mod h1:xIteQHvHuaLYG9IFj6mSxM0fCKrs34IrEQUhOYuGPHc=
 github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
 github.com/pkg/errors v0.8.1-0.20171018195549-f15c970de5b7/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8=
 github.com/posener/complete v1.2.3/go.mod h1:WZIdtGGp+qx0sLrYKtIRAruyNpv6hFCicSgv7Sy7s/s=
 github.com/poy/onpar v0.0.0-20200406201722-06f95a1c68e8/go.mod h1:nSbFQvMj97ZyhFRSJYtut+msi4sOY6zJDGCdSc+/rZU=
 github.com/pquerna/cachecontrol v0.0.0-20171018203845-0dec1b30a021/go.mod h1:prYjPmNq4d1NPVmpShWobRqXY3q7Vp+80DqgxxUrUIA=
@@ -772,14 +792,17 @@ github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijb
 github.com/urfave/cli v1.22.4/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
 github.com/urfave/cli v1.22.12/go.mod h1:sSBEIC79qR6OvcmsD4U3KABeOTxDqQtdDnaFuUN30b8=
 github.com/urfave/cli v1.22.14/go.mod h1:X0eDS6pD6Exaclxm99NJ3FiCDRED7vIHpx2mDOHLvkA=
+github.com/urfave/cli v1.22.15/go.mod h1:wSan1hmo5zeyLGBjRJbzRTNk8gwoYa2B9n4q9dmRIc0=
 github.com/vbatts/tar-split v0.11.2/go.mod h1:vV3ZuO2yWSVsz+pfFzDG/upWH1JhjOiEaWq6kXyQ3VI=
 github.com/vbatts/tar-split v0.11.3/go.mod h1:9QlHN18E+fEH7RdG+QAJJcuya3rqT7eXSTY7wGrAokY=
 github.com/vektah/gqlparser/v2 v2.4.5/go.mod h1:flJWIR04IMQPGz+BXLrORkrARBxv/rtyIAFvd/MceW0=
 github.com/veraison/go-cose v1.0.0-rc.1/go.mod h1:7ziE85vSq4ScFTg6wyoMXjucIGOf4JkFEZi/an96Ct4=
+github.com/veraison/go-cose v1.1.0/go.mod h1:7ziE85vSq4ScFTg6wyoMXjucIGOf4JkFEZi/an96Ct4=
 github.com/veraison/go-cose v1.2.0/go.mod h1:7ziE85vSq4ScFTg6wyoMXjucIGOf4JkFEZi/an96Ct4=
 github.com/vishvananda/netlink v0.0.0-20181108222139-023a6dafdcdf/go.mod h1:+SR5DhBJrl6ZM7CoCKvpw5BKroDKQ+PJqOg65H/2ktk=
 github.com/vishvananda/netlink v1.1.1-0.20201029203352-d40f9887b852/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho=
 github.com/vishvananda/netlink v1.2.1-beta.2/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho=
+github.com/vishvananda/netlink v1.3.0/go.mod h1:i6NetklAujEcC6fK0JPjT8qSwWyO0HLn4UKG+hGqeJs=
 github.com/vishvananda/netns v0.0.0-20180720170159-13995c7128cc/go.mod h1:ZjcWmFBXmLKZu9Nxj3WKYEafiSqer2rnvPr0en9UNpI=
 github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0=
 github.com/vishvananda/netns v0.0.0-20210104183010-2eb08e3e575f/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0=
@@ -792,6 +815,7 @@ github.com/xhit/go-str2duration/v2 v2.1.0/go.mod h1:ohY8p+0f07DiV6Em5LKB0s2YpLtX
 github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU=
 github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q=
 github.com/yashtewari/glob-intersection v0.1.0/go.mod h1:LK7pIC3piUjovexikBbJ26Yml7g8xa5bsjfx2v1fwok=
+github.com/yashtewari/glob-intersection v0.2.0/go.mod h1:LK7pIC3piUjovexikBbJ26Yml7g8xa5bsjfx2v1fwok=
 github.com/yuin/goldmark v1.4.1/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
 github.com/yvasiyarov/go-metrics v0.0.0-20140926110328-57bccd1ccd43/go.mod h1:aX5oPXxHm3bOH+xeAttToC8pqch2ScQN/JoXYupl6xs=
 github.com/yvasiyarov/gorelic v0.0.0-20141212073537-a9bba5b9ab50/go.mod h1:NUSPSUX/bi6SeDMUh6brw0nXpxHnc96TguQh0+r/ssA=
@@ -1031,6 +1055,7 @@ golang.org/x/tools v0.10.0/go.mod h1:UJwyiVBsOA2uwvK/e5OY3GTpDUJriEd+/YlqAwLPmyM
 golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8=
 google.golang.org/api v0.0.0-20160322025152-9bf6e6e569ff/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0=
 google.golang.org/api v0.126.0/go.mod h1:mBwVAtz+87bEN6CbA1GtZPDOqY2R5ONPqJeIlvyo4Aw=
+google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
 google.golang.org/cloud v0.0.0-20151119220103-975617b05ea8/go.mod h1:0H1ncTHf11KCFhTc/+EFRbzSCOZx+VUbRMk55Yv5MYk=
 google.golang.org/genproto v0.0.0-20190522204451-c2c4e71fbf69/go.mod h1:z3L6/3dTEVtUr6QSP8miRzeRqwQOioJ9I66odjN4I7s=
 google.golang.org/genproto v0.0.0-20200117163144-32f20d992d24/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
@@ -1073,6 +1098,7 @@ google.golang.org/grpc v1.57.0/go.mod h1:Sd+9RMTACXwmub0zcNY2c4arhtrbBYD1AUHI/dt
 google.golang.org/grpc v1.58.3/go.mod h1:tgX3ZQDlNJGU96V6yHh1T/JeoBQ2TXdr43YbYSsCJk0=
 google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.1.0/go.mod h1:6Kw0yEErY5E/yWrBtf03jp27GLLJujG4z/JK95pnjjw=
 google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.3.0/go.mod h1:Dk1tviKTvMCz5tvh7t+fh94dhmQVHuCt2OzJB3CTW9Y=
+google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.5.1/go.mod h1:5KF+wpkbTSbGcR9zteSqZV6fqFOWBl4Yde8En8MryZA=
 google.golang.org/protobuf v1.29.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
 google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
 gopkg.in/airbrake/gobrake.v2 v2.0.9/go.mod h1:/h5ZAUhDkGaJfjzjKLSjv6zCL6O0LLBxU4K+aSYdM/U=
diff --git a/test/integration/scenarios/faultTests/README.md b/test/integration/scenarios/faultTests/README.md
index 28e0cdb50..01119039b 100644
--- a/test/integration/scenarios/faultTests/README.md
+++ b/test/integration/scenarios/faultTests/README.md
@@ -18,7 +18,7 @@ Add a comment line like example blow. Gofail package will translate it to the fa
 
 2. Add a new fault test case
 
-There are already two fault tests - solution upgrade and workflow materialize in the faultTests folder. You can add a new case in the [constants.go](./constants.go) you want to use existing fault tests with new failpoint.
+There are already two fault tests - solution upgrade and workflow materialize in the faultTests folder. You can add a new case in the [constants.go](./utils/constants.go) you want to use existing fault tests with new failpoint.
 
 For example, you can specify the test, the pod to inject failure, the failpoint name and fault types in the below structure. The most common fault type is `100.0%panic`. And you can also use other faults like sleep, error following [Gofail term](https://github.com/etcd-io/gofail/blob/master/doc/design.md#gofail-term)
 ```json

From 0ebe01a276fc16b4001d1dc73d8da8f67035d676 Mon Sep 17 00:00:00 2001
From: Xingdong <xdlisjtu@gmail.com>
Date: Sun, 17 Nov 2024 20:46:36 +0800
Subject: [PATCH 6/9] Revert job event retry and disable solution fault tests

---
 api/pkg/apis/v1alpha1/vendors/job-vendor.go   |  3 +-
 .../scenarios/faultTests/utils/constants.go   | 74 ++++++++++---------
 2 files changed, 40 insertions(+), 37 deletions(-)

diff --git a/api/pkg/apis/v1alpha1/vendors/job-vendor.go b/api/pkg/apis/v1alpha1/vendors/job-vendor.go
index 56cded851..06bf9048d 100644
--- a/api/pkg/apis/v1alpha1/vendors/job-vendor.go
+++ b/api/pkg/apis/v1alpha1/vendors/job-vendor.go
@@ -74,7 +74,8 @@ func (e *JobVendor) Init(config vendors.VendorConfig, factories []managers.IMana
 			if err != nil && v1alpha2.IsDelayed(err) {
 				go e.Vendor.Context.Publish(topic, event)
 			}
-			return err
+			// job reconciler already has a retry mechanism, return nil to avoid retrying
+			return nil
 		},
 	})
 	e.Vendor.Context.Subscribe("heartbeat", v1alpha2.EventHandler{
diff --git a/test/integration/scenarios/faultTests/utils/constants.go b/test/integration/scenarios/faultTests/utils/constants.go
index fba5d29e8..24a1eb8c9 100644
--- a/test/integration/scenarios/faultTests/utils/constants.go
+++ b/test/integration/scenarios/faultTests/utils/constants.go
@@ -32,42 +32,44 @@ var (
 		"k8s": "control-plane=symphony-controller-manager",
 	}
 	Faults = []FaultTestCase{
-		{
-			TestCase:  TestCases["solutionUpdate"],
-			PodLabel:  PodLabels["api"],
-			Fault:     "onQueueError",
-			FaultType: DefaultFaultType,
-		},
-		{
-			TestCase:  TestCases["solutionUpdate"],
-			PodLabel:  PodLabels["api"],
-			Fault:     "beforeProviders",
-			FaultType: DefaultFaultType,
-		},
-		{
-			TestCase:  TestCases["solutionUpdate"],
-			PodLabel:  PodLabels["api"],
-			Fault:     "beforeDeploymentError",
-			FaultType: DefaultFaultType,
-		},
-		{
-			TestCase:  TestCases["solutionUpdate"],
-			PodLabel:  PodLabels["api"],
-			Fault:     "afterDeploymentError",
-			FaultType: DefaultFaultType,
-		},
-		{
-			TestCase:  TestCases["solutionUpdate"],
-			PodLabel:  PodLabels["api"],
-			Fault:     "beforeConcludeSummary",
-			FaultType: DefaultFaultType,
-		},
-		{
-			TestCase:  TestCases["solutionUpdate"],
-			PodLabel:  PodLabels["api"],
-			Fault:     "beforeConcludeSummary",
-			FaultType: DefaultFaultType,
-		},
+		// Symphony API fault tests are disabled since job events cannot be retried now.
+		// Tracked in issue 558
+		// {
+		// 	TestCase:  TestCases["solutionUpdate"],
+		// 	PodLabel:  PodLabels["api"],
+		// 	Fault:     "onQueueError",
+		// 	FaultType: DefaultFaultType,
+		// },
+		// {
+		// 	TestCase:  TestCases["solutionUpdate"],
+		// 	PodLabel:  PodLabels["api"],
+		// 	Fault:     "beforeProviders",
+		// 	FaultType: DefaultFaultType,
+		// },
+		// {
+		// 	TestCase:  TestCases["solutionUpdate"],
+		// 	PodLabel:  PodLabels["api"],
+		// 	Fault:     "beforeDeploymentError",
+		// 	FaultType: DefaultFaultType,
+		// },
+		// {
+		// 	TestCase:  TestCases["solutionUpdate"],
+		// 	PodLabel:  PodLabels["api"],
+		// 	Fault:     "afterDeploymentError",
+		// 	FaultType: DefaultFaultType,
+		// },
+		// {
+		// 	TestCase:  TestCases["solutionUpdate"],
+		// 	PodLabel:  PodLabels["api"],
+		// 	Fault:     "beforeConcludeSummary",
+		// 	FaultType: DefaultFaultType,
+		// },
+		// {
+		// 	TestCase:  TestCases["solutionUpdate"],
+		// 	PodLabel:  PodLabels["api"],
+		// 	Fault:     "beforeConcludeSummary",
+		// 	FaultType: DefaultFaultType,
+		// },
 		{
 			TestCase:  TestCases["solutionUpdate"],
 			PodLabel:  PodLabels["k8s"],

From efd38ea9aff506c6e0b737ae6a38d63fb05460c5 Mon Sep 17 00:00:00 2001
From: Xingdong <xdlisjtu@gmail.com>
Date: Sun, 17 Nov 2024 23:15:38 +0800
Subject: [PATCH 7/9] dockerfile fix

---
 k8s/Dockerfile | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/k8s/Dockerfile b/k8s/Dockerfile
index 86b6dcf6b..00f329395 100644
--- a/k8s/Dockerfile
+++ b/k8s/Dockerfile
@@ -27,8 +27,7 @@ ARG FAULT_INJECTION_ENABLED=false
 
 # Install gcc, g++ and other necessary build tools
 RUN if echo "${BUILD_BASE_IMAGE}" | grep "alpine"; then \
-        apk add --no-cache gcc musl-dev; \
-        apk update && apk add curl; \
+        apk add --no-cache gcc musl-dev curl; \
     elif echo "${BUILD_BASE_IMAGE}" | grep "mariner"; then \
         tdnf install -y gcc glibc-devel && tdnf clean all; \
     else \

From 9689dee73978422ff97abcf4c7831935f1bb4161 Mon Sep 17 00:00:00 2001
From: Xingdong <xdlisjtu@gmail.com>
Date: Wed, 27 Nov 2024 21:59:15 +0800
Subject: [PATCH 8/9] check campaign before creating activation

---
 .../workflow/materialize/verify/manifest_test.go      | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/test/integration/scenarios/faultTests/workflow/materialize/verify/manifest_test.go b/test/integration/scenarios/faultTests/workflow/materialize/verify/manifest_test.go
index 427212ed1..6d5a168a4 100644
--- a/test/integration/scenarios/faultTests/workflow/materialize/verify/manifest_test.go
+++ b/test/integration/scenarios/faultTests/workflow/materialize/verify/manifest_test.go
@@ -56,16 +56,15 @@ func TestMaterializeWorkflow(t *testing.T) {
 	namespace := "nondefault"
 	err := utils.InjectPodFailure()
 	require.NoError(t, err)
-	DeployManifests(namespace)
+	DeployManifests(t, namespace)
 	CheckCatalogs(t, namespace)
-	CheckCampaign(t, namespace)
 	CheckActivationStatus(t, namespace)
 	CheckTargetStatus(t, namespace)
 	CheckInstanceStatus(t, namespace)
 	VerifyPodsExist(t, namespace)
 }
 
-func DeployManifests(namespace string) error {
+func DeployManifests(t *testing.T, namespace string) error {
 	repoPath := "../"
 	if namespace != "default" {
 		// Create non-default namespace if not exist
@@ -95,8 +94,8 @@ func DeployManifests(namespace string) error {
 		}
 	}
 
-	// wait for 5 seconds to make sure campaign is created
-	time.Sleep(time.Second * 5)
+	CheckCampaign(t, namespace)
+
 	for _, activation := range testActivations {
 		absActivation := filepath.Join(repoPath, activation)
 		err := shellcmd.Command(fmt.Sprintf("kubectl apply -f %s -n %s", absActivation, namespace)).Run()
@@ -149,7 +148,7 @@ func CheckCatalogs(t *testing.T, namespace string) {
 	}
 }
 
-// Verify catalog created
+// Verify campaign created
 func CheckCampaign(t *testing.T, namespace string) {
 	fmt.Printf("Checking Campaign\n")
 	if namespace == "" {

From 389dec936d36903da790ec84069c18d7df4c799a Mon Sep 17 00:00:00 2001
From: Xingdong <xdlisjtu@gmail.com>
Date: Thu, 28 Nov 2024 16:04:23 +0800
Subject: [PATCH 9/9] fix comments

---
 .../faultTests/solution/update/verify/manifest_test.go    | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/test/integration/scenarios/faultTests/solution/update/verify/manifest_test.go b/test/integration/scenarios/faultTests/solution/update/verify/manifest_test.go
index 32322a6b8..cfe31498f 100644
--- a/test/integration/scenarios/faultTests/solution/update/verify/manifest_test.go
+++ b/test/integration/scenarios/faultTests/solution/update/verify/manifest_test.go
@@ -256,9 +256,7 @@ func verifyPodsExist(t *testing.T, test TestCase, toFind []string) {
 	kubeClient, err := testhelpers.KubeClient()
 	require.NoError(t, err)
 
-	i := 0
-	for {
-		i++
+	for i := 0; ; i++ {
 		// List all pods in the namespace
 		pods, err := kubeClient.CoreV1().Pods("test-scope").List(context.Background(), metav1.ListOptions{})
 		require.NoError(t, err)
@@ -300,9 +298,7 @@ func verifyPodsDeleted(t *testing.T, test TestCase, toFind []string) {
 	kubeClient, err := testhelpers.KubeClient()
 	require.NoError(t, err)
 
-	i := 0
-	for {
-		i++
+	for i := 0; ; i++ {
 		// List all pods in the namespace
 		pods, err := kubeClient.CoreV1().Pods("test-scope").List(context.Background(), metav1.ListOptions{})
 		require.NoError(t, err)