Skip to content

Commit

Permalink
Automate Fault tests (#544)
Browse files Browse the repository at this point in the history
* Fault tests

* README

* refactor and fix comments

* fix comments

* fix comments

* Revert job event retry and disable solution fault tests

* dockerfile fix

* check campaign before creating activation

* fix comments

---------

Co-authored-by: Jiawei Du <[email protected]>
  • Loading branch information
RemindD and msftcoderdjw authored Dec 2, 2024
1 parent 5ea7d2a commit 11fd857
Show file tree
Hide file tree
Showing 39 changed files with 1,676 additions and 10 deletions.
111 changes: 111 additions & 0 deletions .github/workflows/fault.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
# This workflow will build a golang project
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-go

name: fault

on:
push:
branches:
- main
- 'release/**'
pull_request:
branches:
- main
- 'release/**'
workflow_dispatch:
env:
ContainerRegistry: "ghcr.io"
ContainerRegistryRepo: "ghcr.io/eclipse-symphony"

jobs:

build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: 1.22.4

- name: Set up custom GOPATH
run: |
mkdir -p /home/runner/go
echo "export GOPATH=/home/runner/go" >> $HOME/.bashrc
echo "export PATH=\$PATH:\$GOPATH/bin" >> $HOME/.bashrc
source $HOME/.bashrc
- name: Install make
run: sudo apt-get update && sudo apt-get install -y build-essential

- name: Check docker version and images
run: docker --version && docker images

- name: Install kubectl
run: |
curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
chmod +x kubectl
sudo mv ./kubectl /usr/local/bin/kubectl
kubectl version --client
kubectl config view
- name: Install Helm
run: |
curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3
chmod 700 get_helm.sh
./get_helm.sh
- name: Install minikube
run: |
curl -Lo minikube https://storage.googleapis.com/minikube/releases/latest/minikube-linux-amd64
chmod +x minikube
sudo mv minikube /usr/local/bin/
minikube start
kubectl config view
- name: Install Mage
run: |
cd ..
git clone https://github.com/magefile/mage
cd mage
go run bootstrap.go
cd ..
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
registry: ${{ env.ContainerRegistry }}
username: ${{ github.repository_owner }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Build docker images
run: |
cd test/localenv/
mage build:apifault
mage build:k8sfault
mage cluster:up
- name: Go work init
run: |
mv go.work.bk go.work
- name: Run fault tests
run: |
cd test/integration/scenarios/faultTests/ && mage faulttests
- name: Collect and upload symphony logs
uses: actions/upload-artifact@v4
with:
name: symphony-logs
path: |
/tmp/symphony-integration-test-logs/**/*.log
continue-on-error: true
if: always()







12 changes: 11 additions & 1 deletion api/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ ARG TARGETPLATFORM
ARG BUILDPLATFORM
ARG TARGETOS
ARG TARGETARCH
ARG FAULT_INJECTION_ENABLED=false

ENV CGO_ENABLED=0

Expand All @@ -23,6 +24,14 @@ COPY ./api /workspace/api
WORKDIR /workspace/api
# File permissions are not preserved when copying files in ADO.
RUN chmod +x pkg/apis/v1alpha1/providers/target/script/mock-*.sh

# Install gofail
RUN if [ "$FAULT_INJECTION_ENABLED" == "true" ]; then \
go install go.etcd.io/gofail@latest && \
find /workspace/api -type d | while read -r dir; do gofail enable $dir; done && \
find /workspace/coa -type d | while read -r dir; do gofail enable $dir; done && \
cd /workspace/api && go get go.etcd.io/gofail/runtime; \
fi
RUN CGO_ENABLED=${CGO_ENABLED} GOOS=${TARGETOS} GOARCH=${TARGETARCH} GODEBUG=netdns=cgo go build -o /dist/symphony-api

FROM ${TARGET_BASE_IMAGE}
Expand Down Expand Up @@ -51,5 +60,6 @@ ADD ./api/symphony-api.json /
EXPOSE 8080
EXPOSE 8081
ENV LOG_LEVEL=Debug
ENV GOFAIL_HTTP="127.0.0.1:22381"
# ENV CONFIG /symphony-api.json
CMD sh -c 'if [ -f /etc/pki/ca-trust/source/anchors/proxy-cert.crt ]; then update-ca-trust; fi && exec /symphony-api -c $CONFIG -l $LOG_LEVEL'
CMD sh -c 'if [ -f /etc/pki/ca-trust/source/anchors/proxy-cert.crt ]; then update-ca-trust; fi && exec /symphony-api -c $CONFIG -l $LOG_LEVEL'
22 changes: 18 additions & 4 deletions api/pkg/apis/v1alpha1/managers/solution/solution-manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"errors"
"fmt"
"os"
"runtime/debug"
"strconv"
"strings"
"time"
Expand Down Expand Up @@ -330,11 +331,16 @@ func (s *SolutionManager) Reconcile(ctx context.Context, deployment model.Deploy
return summary, err
}
defer func() {
log.DebugfCtx(ctx, " M (Solution): Reconcile conclude Summary. Namespace: %v, deployment instance: %v, summary message: %v", namespace, deployment.Instance, summary.SummaryMessage)
if deployment.IsDryRun {
summary.SuccessCount = 0
if r := recover(); r == nil {
log.DebugfCtx(ctx, " M (Solution): Reconcile conclude Summary. Namespace: %v, deployment instance: %v, summary message: %v", namespace, deployment.Instance, summary.SummaryMessage)
if deployment.IsDryRun {
summary.SuccessCount = 0
}
s.concludeSummary(ctx, deployment.Instance.ObjectMeta.Name, deployment.Generation, deployment.Hash, summary, namespace)
} else {
log.ErrorfCtx(ctx, " M (Solution): panic happens: %v", debug.Stack())
panic(r)
}
s.concludeSummary(ctx, deployment.Instance.ObjectMeta.Name, deployment.Generation, deployment.Hash, summary, namespace)
}()

defer func() {
Expand Down Expand Up @@ -424,6 +430,8 @@ func (s *SolutionManager) Reconcile(ctx context.Context, deployment model.Deploy
return summary, err
}
log.DebugfCtx(ctx, " M (Solution): reconcile save summary progress: start deploy, total %v deployments", summary.PlannedDeployment)
// DO NOT REMOVE THIS COMMENT
// gofail: var beforeProviders string

plannedCount := 0
planSuccessCount := 0
Expand Down Expand Up @@ -553,6 +561,9 @@ func (s *SolutionManager) Reconcile(ctx context.Context, deployment model.Deploy

mergedState.ClearAllRemoved()

// DO NOT REMOVE THIS COMMENT
// gofail: var beforeDeploymentError string

if !deployment.IsDryRun {
if len(mergedState.TargetComponent) == 0 && remove {
log.DebugfCtx(ctx, " M (Solution): no assigned components to manage, deleting state")
Expand Down Expand Up @@ -584,6 +595,9 @@ func (s *SolutionManager) Reconcile(ctx context.Context, deployment model.Deploy
}
}

// DO NOT REMOVE THIS COMMENT
// gofail: var afterDeploymentError string

successCount := 0
for _, v := range targetResult {
successCount += v
Expand Down
2 changes: 2 additions & 0 deletions api/pkg/apis/v1alpha1/managers/stage/stage-manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -631,6 +631,8 @@ func (s *StageManager) HandleTriggerEvent(ctx context.Context, campaign model.Ca

waitGroup.Wait()
close(results)
// DO NOT REMOVE THIS COMMENT
// gofail: var afterProvider string

outputs := make(map[string]interface{})
delayedExit := false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,8 @@ func (i *MaterializeStageProvider) Process(ctx context.Context, mgrContext conte
}
createdObjectList[catalog.ObjectMeta.Name] = true
}
// DO NOT REMOVE THIS COMMENT
// gofail: var afterMaterializeOnce bool
}
if len(createdObjectList) < len(objects) {
errorMessage := "failed to create all objects:"
Expand Down
4 changes: 4 additions & 0 deletions api/pkg/apis/v1alpha1/vendors/solution-vendor.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,10 @@ func (c *SolutionVendor) onQueue(request v1alpha2.COARequest) v1alpha2.COARespon
case fasthttp.MethodPost:
ctx, span := observability.StartSpan("onQueue-POST", rContext, nil)
defer span.End()

// DO NOT REMOVE THIS COMMENT
// gofail: var onQueueError string

instance := request.Parameters["instance"]
delete := request.Parameters["delete"]
objectType := request.Parameters["objectType"]
Expand Down
2 changes: 2 additions & 0 deletions api/pkg/apis/v1alpha1/vendors/stage-vendor.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,8 @@ func (s *StageVendor) Init(config vendors.VendorConfig, factories []managers.IMa
Context: ctx,
})
}
// DO NOT REMOVE THIS COMMENT
// gofail: var afterPublishTrigger string
return nil
},
Group: "0",
Expand Down
2 changes: 2 additions & 0 deletions coa/pkg/apis/v1alpha2/providers/pubsub/redis/redis.go
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,8 @@ func (i *RedisPubSubProvider) pollNewMessages(topic string, handler v1alpha2.Eve
}()

for {
// DO NOT REMOVE THIS COMMENT
// gofail: var PollNewMessagesLoop string
if i.Ctx.Err() != nil {
return
}
Expand Down
15 changes: 13 additions & 2 deletions k8s/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,11 @@ ENV CGO_ENABLED=0

ARG BUILD_BASE_IMAGE

ARG FAULT_INJECTION_ENABLED=false

# Install gcc, g++ and other necessary build tools
RUN if echo "${BUILD_BASE_IMAGE}" | grep "alpine"; then \
apk add --no-cache gcc musl-dev; \
apk add --no-cache gcc musl-dev curl; \
elif echo "${BUILD_BASE_IMAGE}" | grep "mariner"; then \
tdnf install -y gcc glibc-devel && tdnf clean all; \
else \
Expand All @@ -50,11 +52,20 @@ RUN if echo "${BUILD_BASE_IMAGE}" | grep "mariner"; then \
CGO_ENABLED=1 mage generate operatorTest; \
fi

# Install gofail
RUN if [ "$FAULT_INJECTION_ENABLED" == "true" ]; then \
go install go.etcd.io/gofail@latest && \
find /k8s -type d | while read -r dir; do gofail enable $dir; done && \
cd /k8s && go get go.etcd.io/gofail/runtime; \
fi

# Build
RUN CGO_ENABLED=0 mage build
FROM ${TARGET_BASE_IMAGE} AS manager

WORKDIR /
COPY --from=builder /k8s/bin/manager .
USER 65532:65532
ENV GOFAIL_HTTP="127.0.0.1:22381"

ENTRYPOINT ["/manager"]
ENTRYPOINT ["/manager"]
3 changes: 3 additions & 0 deletions k8s/apis/fabric/v1/target_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,9 @@ func (r *Target) ValidateCreate() (admission.Warnings, error) {
operationName := fmt.Sprintf("%s/%s", constants.TargetOperationNamePrefix, constants.ActivityOperation_Write)
ctx := configutils.PopulateActivityAndDiagnosticsContextFromAnnotations(r.GetNamespace(), resourceK8SId, r.Annotations, operationName, myTargetClient, context.TODO(), targetlog)

// DO NOT REMOVE THIS COMMENT
// gofail: var validateError error

diagnostic.InfoWithCtx(targetlog, ctx, "validate create", "name", r.Name, "namespace", r.Namespace)
observ_utils.EmitUserAuditsLogs(ctx, "Target %s is being created on namespace %s", r.Name, r.Namespace)

Expand Down
6 changes: 5 additions & 1 deletion k8s/controllers/solution/instance_polling_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ func (r *InstancePollingReconciler) Reconcile(ctx context.Context, req ctrl.Requ
log := ctrllog.FromContext(ctx)
log.Info("Reconcile Polling Instance " + req.Name + " in namespace " + req.Namespace)

// DO NOT REMOVE THIS COMMENT
// gofail: var beforePollingResult string

// Initialize reconcileTime for latency metrics
reconcileTime := time.Now()

Expand Down Expand Up @@ -85,7 +88,8 @@ func (r *InstancePollingReconciler) Reconcile(ctx context.Context, req ctrl.Requ
metrics.InstanceResourceType,
deploymentOperationType,
)

// DO NOT REMOVE THIS COMMENT
// gofail: var afterPollingResult string
return reconcileResult, err
}

Expand Down
13 changes: 11 additions & 2 deletions k8s/reconcilers/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,9 @@ func (r *DeploymentReconciler) populateDiagnosticsAndActivitiesFromAnnotations(c

// attemptUpdate attempts to update the instance
func (r *DeploymentReconciler) AttemptUpdate(ctx context.Context, object Reconcilable, isRemoval bool, log logr.Logger, operationStartTimeKey string, operationName string) (metrics.OperationStatus, reconcile.Result, error) {
// DO NOT REMOVE THIS COMMENT
// gofail: var delayAttemptUpdate string

// populate diagnostics and activities from annotations
ctx = r.populateDiagnosticsAndActivitiesFromAnnotations(ctx, object, operationName, r.kubeClient, log)
if !controllerutil.ContainsFinalizer(object, r.finalizerName) && !isRemoval {
Expand Down Expand Up @@ -194,11 +197,14 @@ func (r *DeploymentReconciler) AttemptUpdate(ctx context.Context, object Reconci
diagnostic.ErrorWithCtx(log, ctx, err, "failed to update jobid")
return metrics.StatusUpdateFailed, ctrl.Result{}, err
}

// DO NOT REMOVE THIS COMMENT
// gofail: var beforeQueueJob string
if err := r.queueDeploymentJob(ctx, object, isRemoval, operationStartTimeKey); err != nil {
diagnostic.ErrorWithCtx(log, ctx, err, "failed to queue deployment job")
return r.handleDeploymentError(ctx, object, nil, isRemoval, reconciliationInterval, err, log)
}
// DO NOT REMOVE THIS COMMENT
// gofail: var afterQueueJob string

diagnostic.InfoWithCtx(log, ctx, "Updating object status with deployment queued")
if _, err := r.updateObjectStatus(ctx, object, nil, patchStatusOptions{deploymentQueued: true}, log); err != nil {
Expand All @@ -218,6 +224,9 @@ func (r *DeploymentReconciler) AttemptUpdate(ctx context.Context, object Reconci
}

func (r *DeploymentReconciler) PollingResult(ctx context.Context, object Reconcilable, isRemoval bool, log logr.Logger, operationStartTimeKey string, operationName string) (metrics.OperationStatus, reconcile.Result, error) {
// DO NOT REMOVE THIS COMMENT
// gofail: var delayBeforePolling string

// populate diagnostics and activities from annotations
ctx = r.populateDiagnosticsAndActivitiesFromAnnotations(ctx, object, operationName, r.kubeClient, log)
// Get reconciliation interval
Expand Down Expand Up @@ -511,7 +520,7 @@ func (r *DeploymentReconciler) updateObjectStatus(ctx context.Context, object Re
nextStatus.LastModified = metav1.Now()
object.SetStatus(*nextStatus)

err = r.kubeClient.Status().Update(context.Background(), object)
err = r.kubeClient.Status().Update(ctx, object)
if err != nil {
diagnostic.ErrorWithCtx(log, ctx, err, "failed to update object status")
}
Expand Down
Loading

0 comments on commit 11fd857

Please sign in to comment.