Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Automate Fault tests #544

Merged
merged 10 commits into from
Dec 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 111 additions & 0 deletions .github/workflows/fault.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
# This workflow will build a golang project
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-go

name: fault

on:
push:
branches:
- main
- 'release/**'
pull_request:
branches:
- main
- 'release/**'
workflow_dispatch:
env:
ContainerRegistry: "ghcr.io"
ContainerRegistryRepo: "ghcr.io/eclipse-symphony"

jobs:

build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: 1.22.4

- name: Set up custom GOPATH
run: |
mkdir -p /home/runner/go
echo "export GOPATH=/home/runner/go" >> $HOME/.bashrc
echo "export PATH=\$PATH:\$GOPATH/bin" >> $HOME/.bashrc
source $HOME/.bashrc

- name: Install make
run: sudo apt-get update && sudo apt-get install -y build-essential

- name: Check docker version and images
run: docker --version && docker images

- name: Install kubectl
run: |
curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
chmod +x kubectl
sudo mv ./kubectl /usr/local/bin/kubectl
kubectl version --client
kubectl config view

- name: Install Helm
run: |
curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3
chmod 700 get_helm.sh
./get_helm.sh

- name: Install minikube
run: |
curl -Lo minikube https://storage.googleapis.com/minikube/releases/latest/minikube-linux-amd64
chmod +x minikube
sudo mv minikube /usr/local/bin/
minikube start
kubectl config view

- name: Install Mage
run: |
cd ..
git clone https://github.com/magefile/mage
cd mage
go run bootstrap.go
cd ..

- name: Login to Docker Hub
uses: docker/login-action@v3
with:
registry: ${{ env.ContainerRegistry }}
username: ${{ github.repository_owner }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Build docker images
run: |
cd test/localenv/
mage build:apifault
mage build:k8sfault
mage cluster:up

- name: Go work init
run: |
mv go.work.bk go.work

- name: Run fault tests
run: |
cd test/integration/scenarios/faultTests/ && mage faulttests

- name: Collect and upload symphony logs
uses: actions/upload-artifact@v4
with:
name: symphony-logs
path: |
/tmp/symphony-integration-test-logs/**/*.log
continue-on-error: true
if: always()







12 changes: 11 additions & 1 deletion api/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ ARG TARGETPLATFORM
ARG BUILDPLATFORM
ARG TARGETOS
ARG TARGETARCH
ARG FAULT_INJECTION_ENABLED=false

ENV CGO_ENABLED=0

Expand All @@ -23,6 +24,14 @@ COPY ./api /workspace/api
WORKDIR /workspace/api
# File permissions are not preserved when copying files in ADO.
RUN chmod +x pkg/apis/v1alpha1/providers/target/script/mock-*.sh

# Install gofail
RUN if [ "$FAULT_INJECTION_ENABLED" == "true" ]; then \
go install go.etcd.io/gofail@latest && \
find /workspace/api -type d | while read -r dir; do gofail enable $dir; done && \
find /workspace/coa -type d | while read -r dir; do gofail enable $dir; done && \
cd /workspace/api && go get go.etcd.io/gofail/runtime; \
fi
RUN CGO_ENABLED=${CGO_ENABLED} GOOS=${TARGETOS} GOARCH=${TARGETARCH} GODEBUG=netdns=cgo go build -o /dist/symphony-api

FROM ${TARGET_BASE_IMAGE}
Expand Down Expand Up @@ -51,5 +60,6 @@ ADD ./api/symphony-api.json /
EXPOSE 8080
EXPOSE 8081
ENV LOG_LEVEL=Debug
ENV GOFAIL_HTTP="127.0.0.1:22381"
# ENV CONFIG /symphony-api.json
CMD sh -c 'if [ -f /etc/pki/ca-trust/source/anchors/proxy-cert.crt ]; then update-ca-trust; fi && exec /symphony-api -c $CONFIG -l $LOG_LEVEL'
CMD sh -c 'if [ -f /etc/pki/ca-trust/source/anchors/proxy-cert.crt ]; then update-ca-trust; fi && exec /symphony-api -c $CONFIG -l $LOG_LEVEL'
22 changes: 18 additions & 4 deletions api/pkg/apis/v1alpha1/managers/solution/solution-manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"errors"
"fmt"
"os"
"runtime/debug"
"strconv"
"strings"
"time"
Expand Down Expand Up @@ -330,11 +331,16 @@ func (s *SolutionManager) Reconcile(ctx context.Context, deployment model.Deploy
return summary, err
}
defer func() {
log.DebugfCtx(ctx, " M (Solution): Reconcile conclude Summary. Namespace: %v, deployment instance: %v, summary message: %v", namespace, deployment.Instance, summary.SummaryMessage)
if deployment.IsDryRun {
summary.SuccessCount = 0
if r := recover(); r == nil {
log.DebugfCtx(ctx, " M (Solution): Reconcile conclude Summary. Namespace: %v, deployment instance: %v, summary message: %v", namespace, deployment.Instance, summary.SummaryMessage)
msftcoderdjw marked this conversation as resolved.
Show resolved Hide resolved
if deployment.IsDryRun {
summary.SuccessCount = 0
}
s.concludeSummary(ctx, deployment.Instance.ObjectMeta.Name, deployment.Generation, deployment.Hash, summary, namespace)
} else {
log.ErrorfCtx(ctx, " M (Solution): panic happens: %v", debug.Stack())
panic(r)
}
s.concludeSummary(ctx, deployment.Instance.ObjectMeta.Name, deployment.Generation, deployment.Hash, summary, namespace)
}()

defer func() {
Expand Down Expand Up @@ -424,6 +430,8 @@ func (s *SolutionManager) Reconcile(ctx context.Context, deployment model.Deploy
return summary, err
}
log.DebugfCtx(ctx, " M (Solution): reconcile save summary progress: start deploy, total %v deployments", summary.PlannedDeployment)
// DO NOT REMOVE THIS COMMENT
// gofail: var beforeProviders string

plannedCount := 0
planSuccessCount := 0
Expand Down Expand Up @@ -553,6 +561,9 @@ func (s *SolutionManager) Reconcile(ctx context.Context, deployment model.Deploy

mergedState.ClearAllRemoved()

// DO NOT REMOVE THIS COMMENT
// gofail: var beforeDeploymentError string

if !deployment.IsDryRun {
if len(mergedState.TargetComponent) == 0 && remove {
log.DebugfCtx(ctx, " M (Solution): no assigned components to manage, deleting state")
Expand Down Expand Up @@ -584,6 +595,9 @@ func (s *SolutionManager) Reconcile(ctx context.Context, deployment model.Deploy
}
}

// DO NOT REMOVE THIS COMMENT
// gofail: var afterDeploymentError string

successCount := 0
for _, v := range targetResult {
successCount += v
Expand Down
2 changes: 2 additions & 0 deletions api/pkg/apis/v1alpha1/managers/stage/stage-manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -631,6 +631,8 @@ func (s *StageManager) HandleTriggerEvent(ctx context.Context, campaign model.Ca

waitGroup.Wait()
close(results)
// DO NOT REMOVE THIS COMMENT
// gofail: var afterProvider string
RemindD marked this conversation as resolved.
Show resolved Hide resolved

outputs := make(map[string]interface{})
delayedExit := false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,8 @@ func (i *MaterializeStageProvider) Process(ctx context.Context, mgrContext conte
}
createdObjectList[catalog.ObjectMeta.Name] = true
}
// DO NOT REMOVE THIS COMMENT
// gofail: var afterMaterializeOnce bool
}
if len(createdObjectList) < len(objects) {
errorMessage := "failed to create all objects:"
Expand Down
4 changes: 4 additions & 0 deletions api/pkg/apis/v1alpha1/vendors/solution-vendor.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,10 @@ func (c *SolutionVendor) onQueue(request v1alpha2.COARequest) v1alpha2.COARespon
case fasthttp.MethodPost:
ctx, span := observability.StartSpan("onQueue-POST", rContext, nil)
defer span.End()

// DO NOT REMOVE THIS COMMENT
// gofail: var onQueueError string

instance := request.Parameters["instance"]
delete := request.Parameters["delete"]
objectType := request.Parameters["objectType"]
Expand Down
2 changes: 2 additions & 0 deletions api/pkg/apis/v1alpha1/vendors/stage-vendor.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,8 @@ func (s *StageVendor) Init(config vendors.VendorConfig, factories []managers.IMa
Context: ctx,
})
}
// DO NOT REMOVE THIS COMMENT
// gofail: var afterPublishTrigger string
return nil
},
Group: "0",
Expand Down
2 changes: 2 additions & 0 deletions coa/pkg/apis/v1alpha2/providers/pubsub/redis/redis.go
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,8 @@ func (i *RedisPubSubProvider) pollNewMessages(topic string, handler v1alpha2.Eve
}()

for {
// DO NOT REMOVE THIS COMMENT
// gofail: var PollNewMessagesLoop string
if i.Ctx.Err() != nil {
return
}
Expand Down
15 changes: 13 additions & 2 deletions k8s/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,11 @@ ENV CGO_ENABLED=0

ARG BUILD_BASE_IMAGE

ARG FAULT_INJECTION_ENABLED=false

# Install gcc, g++ and other necessary build tools
RUN if echo "${BUILD_BASE_IMAGE}" | grep "alpine"; then \
apk add --no-cache gcc musl-dev; \
apk add --no-cache gcc musl-dev curl; \
elif echo "${BUILD_BASE_IMAGE}" | grep "mariner"; then \
tdnf install -y gcc glibc-devel && tdnf clean all; \
else \
Expand All @@ -50,11 +52,20 @@ RUN if echo "${BUILD_BASE_IMAGE}" | grep "mariner"; then \
CGO_ENABLED=1 mage generate operatorTest; \
fi

# Install gofail
RUN if [ "$FAULT_INJECTION_ENABLED" == "true" ]; then \
go install go.etcd.io/gofail@latest && \
find /k8s -type d | while read -r dir; do gofail enable $dir; done && \
cd /k8s && go get go.etcd.io/gofail/runtime; \
fi

# Build
RUN CGO_ENABLED=0 mage build
FROM ${TARGET_BASE_IMAGE} AS manager

WORKDIR /
COPY --from=builder /k8s/bin/manager .
USER 65532:65532
ENV GOFAIL_HTTP="127.0.0.1:22381"
RemindD marked this conversation as resolved.
Show resolved Hide resolved

ENTRYPOINT ["/manager"]
ENTRYPOINT ["/manager"]
3 changes: 3 additions & 0 deletions k8s/apis/fabric/v1/target_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,9 @@ func (r *Target) ValidateCreate() (admission.Warnings, error) {
operationName := fmt.Sprintf("%s/%s", constants.TargetOperationNamePrefix, constants.ActivityOperation_Write)
ctx := configutils.PopulateActivityAndDiagnosticsContextFromAnnotations(r.GetNamespace(), resourceK8SId, r.Annotations, operationName, myTargetClient, context.TODO(), targetlog)

// DO NOT REMOVE THIS COMMENT
// gofail: var validateError error

diagnostic.InfoWithCtx(targetlog, ctx, "validate create", "name", r.Name, "namespace", r.Namespace)
observ_utils.EmitUserAuditsLogs(ctx, "Target %s is being created on namespace %s", r.Name, r.Namespace)

Expand Down
6 changes: 5 additions & 1 deletion k8s/controllers/solution/instance_polling_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ func (r *InstancePollingReconciler) Reconcile(ctx context.Context, req ctrl.Requ
log := ctrllog.FromContext(ctx)
log.Info("Reconcile Polling Instance " + req.Name + " in namespace " + req.Namespace)

// DO NOT REMOVE THIS COMMENT
// gofail: var beforePollingResult string

// Initialize reconcileTime for latency metrics
reconcileTime := time.Now()

Expand Down Expand Up @@ -85,7 +88,8 @@ func (r *InstancePollingReconciler) Reconcile(ctx context.Context, req ctrl.Requ
metrics.InstanceResourceType,
deploymentOperationType,
)

// DO NOT REMOVE THIS COMMENT
// gofail: var afterPollingResult string
return reconcileResult, err
}

Expand Down
13 changes: 11 additions & 2 deletions k8s/reconcilers/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,9 @@ func (r *DeploymentReconciler) populateDiagnosticsAndActivitiesFromAnnotations(c

// attemptUpdate attempts to update the instance
func (r *DeploymentReconciler) AttemptUpdate(ctx context.Context, object Reconcilable, isRemoval bool, log logr.Logger, operationStartTimeKey string, operationName string) (metrics.OperationStatus, reconcile.Result, error) {
// DO NOT REMOVE THIS COMMENT
// gofail: var delayAttemptUpdate string

// populate diagnostics and activities from annotations
ctx = r.populateDiagnosticsAndActivitiesFromAnnotations(ctx, object, operationName, r.kubeClient, log)
if !controllerutil.ContainsFinalizer(object, r.finalizerName) && !isRemoval {
Expand Down Expand Up @@ -194,11 +197,14 @@ func (r *DeploymentReconciler) AttemptUpdate(ctx context.Context, object Reconci
diagnostic.ErrorWithCtx(log, ctx, err, "failed to update jobid")
return metrics.StatusUpdateFailed, ctrl.Result{}, err
}

// DO NOT REMOVE THIS COMMENT
// gofail: var beforeQueueJob string
if err := r.queueDeploymentJob(ctx, object, isRemoval, operationStartTimeKey); err != nil {
diagnostic.ErrorWithCtx(log, ctx, err, "failed to queue deployment job")
return r.handleDeploymentError(ctx, object, nil, isRemoval, reconciliationInterval, err, log)
}
// DO NOT REMOVE THIS COMMENT
// gofail: var afterQueueJob string

diagnostic.InfoWithCtx(log, ctx, "Updating object status with deployment queued")
if _, err := r.updateObjectStatus(ctx, object, nil, patchStatusOptions{deploymentQueued: true}, log); err != nil {
Expand All @@ -218,6 +224,9 @@ func (r *DeploymentReconciler) AttemptUpdate(ctx context.Context, object Reconci
}

func (r *DeploymentReconciler) PollingResult(ctx context.Context, object Reconcilable, isRemoval bool, log logr.Logger, operationStartTimeKey string, operationName string) (metrics.OperationStatus, reconcile.Result, error) {
// DO NOT REMOVE THIS COMMENT
// gofail: var delayBeforePolling string

// populate diagnostics and activities from annotations
ctx = r.populateDiagnosticsAndActivitiesFromAnnotations(ctx, object, operationName, r.kubeClient, log)
// Get reconciliation interval
Expand Down Expand Up @@ -511,7 +520,7 @@ func (r *DeploymentReconciler) updateObjectStatus(ctx context.Context, object Re
nextStatus.LastModified = metav1.Now()
object.SetStatus(*nextStatus)

err = r.kubeClient.Status().Update(context.Background(), object)
err = r.kubeClient.Status().Update(ctx, object)
if err != nil {
diagnostic.ErrorWithCtx(log, ctx, err, "failed to update object status")
}
Expand Down
Loading
Loading