diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 43566691e..ff485b4ad 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -26,7 +26,7 @@ env: jobs: build: - name: Build and Unit Test + name: Build and Test runs-on: ubuntu-latest steps: - name: Checkout repository diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 91214ced8..ca32b0ca3 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,12 +1,7 @@ name: E2E Tests -concurrency: - group: test-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - on: - pull_request_target: - types: [labeled] + pull_request: branches: - main - release-* @@ -15,31 +10,90 @@ on: - '**.md' env: GO_VERSION: '1.22' + AWS_REGION: us-west-2 + AWS_ACCESS_KEY_ID: ${{ secrets.CI_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.CI_AWS_SECRET_ACCESS_KEY }} jobs: e2etest: + concurrency: + group: test-e2e-${{ github.head_ref || github.run_id }} + cancel-in-progress: true name: E2E Tests runs-on: ubuntu-latest - if: contains(github.event.pull_request.labels.*.name, 'test-e2e') - env: - AWS_REGION: us-west-2 - AWS_ACCESS_KEY_ID: ${{ secrets.CI_AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.CI_AWS_SECRET_ACCESS_KEY }} + outputs: + clustername: ${{ steps.vars.outputs.clustername }} + version: ${{ steps.vars.outputs.version }} steps: - name: Checkout repository uses: actions/checkout@v4 + with: + fetch-depth: 0 - name: Setup Go uses: actions/setup-go@v5 with: - go-version: ${{ env.GO_VERSION }} + go-version: ${{ env.GO_VERSION }} + - name: Set up Buildx + uses: docker/setup-buildx-action@v3 + - name: Login to GHCR + uses: docker/login-action@v3.3.0 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Get outputs + id: vars + run: | + echo "version=$(git describe --tags --always)" >> $GITHUB_OUTPUT + echo "clustername=ci-$(date +%s)-e2e-test" >> $GITHUB_OUTPUT + - name: Build and push HMC controller image + uses: docker/build-push-action@v6 + with: + build-args: | + LD_FLAGS=-s -w -X github.com/Mirantis/hmc/internal/build.Version=${{ steps.vars.outputs.version }} + context: . + platforms: linux/amd64,linux/arm64 + tags: | + ghcr.io/mirantis/hmc/controller-ci:${{ steps.vars.outputs.version }} + push: true + cache-from: type=gha + cache-to: type=gha,mode=max + - name: Prepare and push HMC template charts + run: | + make hmc-chart-release + REGISTRY_REPO="oci://ghcr.io/mirantis/hmc/charts-ci" make helm-push - name: Setup kubectl uses: azure/setup-kubectl@v4 - name: Run E2E tests + env: + REGISTRY_REPO: 'oci://ghcr.io/mirantis/hmc/charts-ci' + IMG: 'ghcr.io/mirantis/hmc/controller-ci:${{ steps.vars.outputs.version }}' run: | make test-e2e - name: Archive test results + if: ${{ failure() }} uses: actions/upload-artifact@v4 with: - name: test-logs - path: | - test/e2e/*.log + name: test-logs + path: | + test/e2e/*.log + cleanup: + name: Cleanup + needs: e2etest + runs-on: ubuntu-latest + if: ${{ always() }} + timeout-minutes: 15 + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Setup Go + uses: actions/setup-go@v5 + with: + go-version: ${{ env.GO_VERSION }} + - name: AWS Test Resources + env: + CLUSTER_NAME: '${{ needs.e2etest.outputs.clustername }}' + run: | + make dev-aws-nuke diff --git a/Makefile b/Makefile index 396c640ce..0cdc59041 100644 --- a/Makefile +++ b/Makefile @@ -2,6 +2,8 @@ NAMESPACE ?= hmc-system VERSION ?= $(shell git describe --tags --always) # Image URL to use all building/pushing image targets IMG ?= hmc/controller:latest +IMG_REPO = $(shell echo $(IMG) | cut -d: -f1) +IMG_TAG = $(shell echo $(IMG) | cut -d: -f2) # ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary. ENVTEST_K8S_VERSION = 1.29.0 @@ -103,10 +105,11 @@ tidy: test: generate-all fmt vet envtest tidy external-crd ## Run tests. KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test $$(go list ./... | grep -v /e2e) -coverprofile cover.out -# Utilize Kind or modify the e2e tests to load the image locally, enabling compatibility with other vendors. -.PHONY: test-e2e # Run the e2e tests against a Kind k8s instance that is spun up. +# Utilize Kind or modify the e2e tests to load the image locally, enabling +# compatibility with other vendors. +.PHONY: test-e2e # Run the e2e tests using a Kind k8s instance as the management cluster. test-e2e: cli-install - KIND_CLUSTER_NAME="hmc-test" KIND_VERSION=$(KIND_VERSION) go test ./test/e2e/ -v -ginkgo.v -timeout=2h + KIND_CLUSTER_NAME="hmc-test" KIND_VERSION=$(KIND_VERSION) go test ./test/e2e/ -v -ginkgo.v -timeout=2h .PHONY: lint lint: golangci-lint ## Run golangci-lint linter & yamllint @@ -240,6 +243,13 @@ hmc-deploy: helm .PHONY: dev-deploy dev-deploy: ## Deploy HMC helm chart to the K8s cluster specified in ~/.kube/config. + @$(YQ) eval -i '.image.repository = "$(IMG_REPO)"' config/dev/hmc_values.yaml + @$(YQ) eval -i '.image.tag = "$(IMG_TAG)"' config/dev/hmc_values.yaml + @if [ "$(REGISTRY_REPO)" = "oci://127.0.0.1:$(REGISTRY_PORT)/charts" ]; then \ + $(YQ) eval -i '.controller.defaultRegistryURL = "oci://$(REGISTRY_NAME):5000/charts"' config/dev/hmc_values.yaml; \ + else \ + $(YQ) eval -i '.controller.defaultRegistryURL = "$(REGISTRY_REPO)"' config/dev/hmc_values.yaml; \ + fi; \ $(MAKE) hmc-deploy HMC_VALUES=config/dev/hmc_values.yaml $(KUBECTL) rollout restart -n $(NAMESPACE) deployment/hmc-controller-manager @@ -313,21 +323,22 @@ dev-provider-delete: envsubst .PHONY: dev-creds-apply dev-creds-apply: dev-$(DEV_PROVIDER)-creds -.PHONY: envsubst awscli dev-aws-nuke +.PHONY: envsubst awscli yq dev-aws-nuke dev-aws-nuke: ## Warning: Destructive! Nuke all AWS resources deployed by 'DEV_PROVIDER=aws dev-provider-apply', prefix with CLUSTER_NAME to nuke a specific cluster. + @CLUSTER_NAME=$(CLUSTER_NAME) YQ=$(YQ) AWSCLI=$(AWSCLI) bash -c "./scripts/aws-nuke-ccm.sh elb" @CLUSTER_NAME=$(CLUSTER_NAME) $(ENVSUBST) < config/dev/cloud_nuke.yaml.tpl > config/dev/cloud_nuke.yaml - DISABLE_TELEMETRY=true $(CLOUDNUKE) aws --region $$AWS_REGION --force --config config/dev/cloud_nuke.yaml --resource-type vpc,eip,nat-gateway,ec2-subnet,elb,elbv2,internet-gateway,network-interface,security-group + DISABLE_TELEMETRY=true $(CLOUDNUKE) aws --region $$AWS_REGION --force --config config/dev/cloud_nuke.yaml --resource-type vpc,eip,nat-gateway,ec2,ec2-subnet,elb,elbv2,ebs,internet-gateway,network-interface,security-group @rm config/dev/cloud_nuke.yaml - @CLUSTER_NAME=$(CLUSTER_NAME) YQ=$(YQ) AWSCLI=$(AWSCLI) bash -c ./scripts/aws-nuke-ccm.sh + @CLUSTER_NAME=$(CLUSTER_NAME) YQ=$(YQ) AWSCLI=$(AWSCLI) bash -c "./scripts/aws-nuke-ccm.sh ebs" .PHONY: test-apply -test-apply: kind-deploy registry-deploy dev-push dev-deploy dev-templates +test-apply: kind-deploy dev-deploy dev-templates .PHONY: test-destroy -test-destroy: kind-undeploy registry-undeploy +test-destroy: kind-undeploy .PHONY: cli-install -cli-install: clusterawsadm clusterctl cloud-nuke yq awscli ## Install the necessary CLI tools for deployment, development and testing. +cli-install: clusterawsadm clusterctl cloud-nuke envsubst yq awscli ## Install the necessary CLI tools for deployment, development and testing. ##@ Dependencies @@ -452,9 +463,21 @@ $(ENVSUBST): | $(LOCALBIN) .PHONY: awscli awscli: $(AWSCLI) $(AWSCLI): | $(LOCALBIN) - curl "https://awscli.amazonaws.com/awscli-exe-$(OS)-$(shell uname -m)-$(AWSCLI_VERSION).zip" -o "/tmp/awscliv2.zip" - unzip /tmp/awscliv2.zip -d /tmp - /tmp/aws/install -i $(LOCALBIN)/aws-cli -b $(LOCALBIN) --update + @if [ $(OS) == "linux" ]; then \ + curl "https://awscli.amazonaws.com/awscli-exe-linux-$(shell uname -m)-$(AWSCLI_VERSION).zip" -o "/tmp/awscliv2.zip"; \ + unzip /tmp/awscliv2.zip -d /tmp; \ + /tmp/aws/install -i $(LOCALBIN)/aws-cli -b $(LOCALBIN) --update; \ + fi; \ + if [ $(OS) == "darwin" ]; then \ + curl "https://awscli.amazonaws.com/AWSCLIV2.pkg" -o "AWSCLIV2.pkg"; \ + installer -pkg AWSCLIV2.pkg -target $(LOCALBIN) -applyChoiceChangesXML choices.xml; \ + rm AWSCLIV2.pkg; \ + fi; \ + if [ $(OS) == "windows" ]; then \ + echo "Installing to $(LOCALBIN) on Windows is not yet implemented"; \ + exit 1; \ + fi; \ + # go-install-tool will 'go install' any package with custom target and name of binary, if it doesn't exist # $1 - target path with name of binary (ideally with version) diff --git a/docs/aws/hosted-control-plane.md b/docs/aws/hosted-control-plane.md index d492d7165..19ea7919c 100644 --- a/docs/aws/hosted-control-plane.md +++ b/docs/aws/hosted-control-plane.md @@ -19,7 +19,12 @@ reused with a management cluster. If you deployed your AWS Kubernetes cluster using Cluster API Provider AWS (CAPA) you can obtain all the necessary data with the commands below or use the template found below in the -[HMC ManagedCluster manifest generation](#hmc-managed-cluster-manifest-generation) section. +[HMC ManagedCluster manifest +generation](#hmc-managed-cluster-manifest-generation) section. + +If using the `aws-standalone-cp` template to deploy a hosted cluster it is +recommended to use a `t3.large` or larger instance type as the `hmc-controller` +and other provider controllers will need a large amount of resources to run. **VPC ID** @@ -89,7 +94,7 @@ Grab the following `ManagedCluster` manifest template and save it to a file name apiVersion: hmc.mirantis.com/v1alpha1 kind: ManagedCluster metadata: - name: aws-hosted-cp + name: aws-hosted spec: template: aws-hosted-cp config: @@ -109,3 +114,24 @@ Then run the following command to create the `managedcluster.yaml`: ``` kubectl get awscluster cluster -o go-template="$(cat managedcluster.yaml.tpl)" > managedcluster.yaml ``` +## Deployment Tips +* Ensure HMC templates and the controller image are somewhere public and + fetchable. +* For installing the HMC charts and templates from a custom repository, load + the `kubeconfig` from the cluster and run the commands: + +``` +KUBECONFIG=kubeconfig IMG="ghcr.io/mirantis/hmc/controller-ci:v0.0.1-179-ga5bdf29" REGISTRY_REPO="oci://ghcr.io/mirantis/hmc/charts-ci" make dev-apply +KUBECONFIG=kubeconfig make dev-templates +``` +* The infrastructure will need to manually be marked `Ready` to get the + `MachineDeployment` to scale up. You can patch the `AWSCluster` kind using + the command: + +``` +KUBECONFIG=kubeconfig kubectl patch AWSCluster --type=merge --subresource status --patch 'status: {ready: true}' -n hmc-system +``` + +For additional information on why this is required [click here](https://docs.k0smotron.io/stable/capi-aws/#:~:text=As%20we%20are%20using%20self%2Dmanaged%20infrastructure%20we%20need%20to%20manually%20mark%20the%20infrastructure%20ready.%20This%20can%20be%20accomplished%20using%20the%20following%20command). + + diff --git a/docs/dev.md b/docs/dev.md index 848959399..b736f93dd 100644 --- a/docs/dev.md +++ b/docs/dev.md @@ -83,3 +83,34 @@ export KUBECONFIG=~/.kube/config kubectl --kubeconfig ~/.kube/config get secret -n hmc-system -kubeconfig -o=jsonpath={.data.value} | base64 -d > kubeconfig ``` +## Running E2E tests locally +E2E tests can be ran locally via the `make test-e2e` target. In order to have +CI properly deploy a non-local registry will need to be used and the Helm charts +and hmc-controller image will need to exist on the registry, for example, using +GHCR: + +``` +IMG="ghcr.io/mirantis/hmc/controller-ci:v0.0.1-179-ga5bdf29" \ + REGISTRY_REPO="oci://ghcr.io/mirantis/hmc/charts-ci" \ + make test-e2e +``` + +Optionally, the `NO_CLEANUP=1` env var can be used to disable `After` nodes from +running within some specs, this will allow users to debug tests by re-running +them without the need to wait a while for an infrastructure deployment to occur. +For subsequent runs the `MANAGED_CLUSTER_NAME=` env var should be +passed to tell the test what cluster name to use so that it does not try to +generate a new name and deploy a new cluster. + +Tests that run locally use autogenerated names like `12345678-e2e-test` while +tests that run in CI use names such as `ci-1234567890-e2e-test`. You can always +pass `MANAGED_CLUSTER_NAME=` from the get-go to customize the name used by the +test. + +### Nuke created resources +In CI we run `make dev-aws-nuke` to cleanup test resources, you can do so +manually with: + +``` +CLUSTER_NAME=example-e2e-test make dev-aws-nuke +``` diff --git a/scripts/aws-nuke-ccm.sh b/scripts/aws-nuke-ccm.sh index 26e8a067c..df8c80d5b 100755 --- a/scripts/aws-nuke-ccm.sh +++ b/scripts/aws-nuke-ccm.sh @@ -33,28 +33,30 @@ if [ -z $AWSCLI ]; then exit 1 fi -echo "Checking for ELB with 'kubernetes.io/cluster/$CLUSTER_NAME' tag" -for LOADBALANCER in $($AWSCLI elb describe-load-balancers --output yaml | $YQ '.LoadBalancerDescriptions[].LoadBalancerName'); -do - echo "Checking ELB: $LOADBALANCER for 'kubernetes.io/cluster/$CLUSTER_NAME tag" - DESCRIBE_TAGS=$($AWSCLI elb describe-tags \ - --load-balancer-names $LOADBALANCER \ - --output yaml | $YQ '.TagDescriptions[].Tags.[]' | grep 'kubernetes.io/cluster/$CLUSTER_NAME') - if [ ! -z "${DESCRIBE_TAGS}" ]; then - echo "Deleting ELB: $LOADBALANCER" - $AWSCLI elb delete-load-balancer --load-balancer-name $LOADBALANCER - fi -done +if [ "$1" == "elb" ]; then + echo "Checking for ELB with '$CLUSTER_NAME' tag" + for LOADBALANCER in $($AWSCLI elb describe-load-balancers --output yaml | $YQ '.LoadBalancerDescriptions[].LoadBalancerName'); + do + echo "Checking ELB: $LOADBALANCER for tag" + DESCRIBE_TAGS=$($AWSCLI elb describe-tags --load-balancer-names $LOADBALANCER --output yaml | $YQ '.TagDescriptions[]' | grep $CLUSTER_NAME) + if [ ! -z "${DESCRIBE_TAGS}" ]; then + echo "Deleting ELB: $LOADBALANCER" + $AWSCLI elb delete-load-balancer --load-balancer-name $LOADBALANCER + fi + done +fi -echo "Checking for EBS Volumes with $CLUSTER_NAME within the 'kubernetes.io/created-for/pvc/name' tag" -for VOLUME in $($AWSCLI ec2 describe-volumes --output yaml | $YQ '.Volumes[].VolumeId'); -do - echo "Checking EBS Volume: $VOLUME for $CLUSTER_NAME claim" - DESCRIBE_VOLUMES=$($AWSCLI ec2 describe-volumes \ - --volume-id $VOLUME \ - --output yaml | $YQ '.Volumes | to_entries[] | .value.Tags[] | select(.Key == "kubernetes.io/created-for/pvc/name")' | grep $CLUSTER_NAME) - if [ ! -z "${DESCRIBE_VOLUMES}" ]; then - echo "Deleting EBS Volume: $VOLUME" - $AWSCLI ec2 delete-volume --volume-id $VOLUME - fi -done +if [ "$1" == "ebs" ]; then + echo "Checking for EBS Volumes with '$CLUSTER_NAME' within the 'kubernetes.io/created-for/pvc/name' tag" + for VOLUME in $($AWSCLI ec2 describe-volumes --output yaml | $YQ '.Volumes[].VolumeId'); + do + echo "Checking EBS Volume: $VOLUME for $CLUSTER_NAME claim" + DESCRIBE_VOLUMES=$($AWSCLI ec2 describe-volumes \ + --volume-id $VOLUME \ + --output yaml | $YQ '.Volumes | to_entries[] | .value.Tags[] | select(.Key == "kubernetes.io/created-for/pvc/name")' | grep $CLUSTER_NAME) + if [ ! -z "${DESCRIBE_VOLUMES}" ]; then + echo "Deleting EBS Volume: $VOLUME" + $AWSCLI ec2 delete-volume --volume-id $VOLUME + fi + done +fi diff --git a/templates/cluster/aws-hosted-cp/Chart.yaml b/templates/cluster/aws-hosted-cp/Chart.yaml index ca357f875..23fd28516 100644 --- a/templates/cluster/aws-hosted-cp/Chart.yaml +++ b/templates/cluster/aws-hosted-cp/Chart.yaml @@ -1,18 +1,18 @@ apiVersion: v2 name: aws-hosted-cp -description: | +description: | An HMC template to deploy a k8s cluster on AWS with control plane components within the management cluster. type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.2 +version: 0.1.3 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "1.30.2+k0s.0" +appVersion: '1.30.4+k0s.0' annotations: hmc.mirantis.com/infrastructure-providers: aws hmc.mirantis.com/controlplane-providers: k0smotron diff --git a/templates/cluster/aws-hosted-cp/templates/awscluster.yaml b/templates/cluster/aws-hosted-cp/templates/awscluster.yaml index c6dc896fc..72a12a1f1 100644 --- a/templates/cluster/aws-hosted-cp/templates/awscluster.yaml +++ b/templates/cluster/aws-hosted-cp/templates/awscluster.yaml @@ -4,6 +4,7 @@ metadata: name: {{ include "cluster.name" . }} annotations: cluster.x-k8s.io/managed-by: k0smotron + aws.cluster.x-k8s.io/external-resource-gc: "true" spec: region: {{ .Values.region }} # identityRef: diff --git a/templates/cluster/aws-hosted-cp/templates/k0smotroncontrolplane.yaml b/templates/cluster/aws-hosted-cp/templates/k0smotroncontrolplane.yaml index 3a038baa9..5fb372752 100644 --- a/templates/cluster/aws-hosted-cp/templates/k0smotroncontrolplane.yaml +++ b/templates/cluster/aws-hosted-cp/templates/k0smotroncontrolplane.yaml @@ -46,7 +46,8 @@ spec: - --cluster-name={{ include "cluster.name" . }} # Removing the default `node-role.kubernetes.io/control-plane` node selector # TODO: it does not work - # nodeSelector: "" + nodeSelector: + node-role.kubernetes.io/control-plane: null - name: aws-ebs-csi-driver namespace: kube-system chartname: aws-ebs-csi-driver/aws-ebs-csi-driver diff --git a/templates/cluster/aws-hosted-cp/values.yaml b/templates/cluster/aws-hosted-cp/values.yaml index a9491e27f..6103e8556 100644 --- a/templates/cluster/aws-hosted-cp/values.yaml +++ b/templates/cluster/aws-hosted-cp/values.yaml @@ -4,33 +4,33 @@ workersNumber: 2 clusterNetwork: pods: cidrBlocks: - - "10.244.0.0/16" + - '10.244.0.0/16' services: cidrBlocks: - - "10.96.0.0/12" + - '10.96.0.0/12' # AWS cluster parameters -vpcID: "" -region: "" -sshKeyName: "" +vpcID: '' +region: '' +sshKeyName: '' publicIP: false subnets: - - id: "" - availabilityZone: "" + - id: '' + availabilityZone: '' bastion: enabled: false disableIngressRules: false allowedCIDRBlocks: [] instanceType: t2.micro - ami: "" + ami: '' # AWS machines parameters -amiID: "" +amiID: '' imageLookup: - format: "amzn2-ami-hvm*-gp2" - org: "137112412989" - baseOS: "" + format: 'amzn2-ami-hvm*-gp2' + org: '137112412989' + baseOS: '' iamInstanceProfile: control-plane.cluster-api-provider-aws.sigs.k8s.io -instanceType: "" +instanceType: '' securityGroupIDs: [] rootVolumeSize: 8 @@ -43,4 +43,4 @@ k0smotron: # K0s parameters k0s: - version: v1.30.2+k0s.0 + version: v1.30.4+k0s.0 diff --git a/templates/cluster/aws-standalone-cp/Chart.yaml b/templates/cluster/aws-standalone-cp/Chart.yaml index 3bb77479d..c6b5f6790 100644 --- a/templates/cluster/aws-standalone-cp/Chart.yaml +++ b/templates/cluster/aws-standalone-cp/Chart.yaml @@ -6,12 +6,12 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.2 +version: 0.1.3 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "1.30.2+k0s.0" +appVersion: '1.30.4+k0s.0' annotations: hmc.mirantis.com/infrastructure-providers: aws hmc.mirantis.com/controlplane-providers: k0s diff --git a/templates/cluster/aws-standalone-cp/templates/awscluster.yaml b/templates/cluster/aws-standalone-cp/templates/awscluster.yaml index f797a8c20..c199c7227 100644 --- a/templates/cluster/aws-standalone-cp/templates/awscluster.yaml +++ b/templates/cluster/aws-standalone-cp/templates/awscluster.yaml @@ -2,6 +2,8 @@ apiVersion: infrastructure.cluster.x-k8s.io/v1beta2 kind: AWSCluster metadata: name: {{ include "cluster.name" . }} + annotations: + aws.cluster.x-k8s.io/external-resource-gc: "true" spec: region: {{ .Values.region }} # identityRef: diff --git a/templates/cluster/aws-standalone-cp/values.yaml b/templates/cluster/aws-standalone-cp/values.yaml index 741228064..bcd816604 100644 --- a/templates/cluster/aws-standalone-cp/values.yaml +++ b/templates/cluster/aws-standalone-cp/values.yaml @@ -5,46 +5,46 @@ workersNumber: 2 clusterNetwork: pods: cidrBlocks: - - "10.244.0.0/16" + - '10.244.0.0/16' services: cidrBlocks: - - "10.96.0.0/12" + - '10.96.0.0/12' # AWS cluster parameters -region: "" -sshKeyName: "" +region: '' +sshKeyName: '' publicIP: false bastion: enabled: false disableIngressRules: false allowedCIDRBlocks: [] instanceType: t2.micro - ami: "" + ami: '' # AWS machines parameters controlPlane: - amiID: "" + amiID: '' iamInstanceProfile: control-plane.cluster-api-provider-aws.sigs.k8s.io - instanceType: "" + instanceType: '' rootVolumeSize: 8 imageLookup: - format: "amzn2-ami-hvm*-gp2" - org: "137112412989" - baseOS: "" + format: 'amzn2-ami-hvm*-gp2' + org: '137112412989' + baseOS: '' worker: - amiID: "" + amiID: '' iamInstanceProfile: control-plane.cluster-api-provider-aws.sigs.k8s.io - instanceType: "" + instanceType: '' rootVolumeSize: 8 imageLookup: - format: "amzn2-ami-hvm*-gp2" - org: "137112412989" - baseOS: "" + format: 'amzn2-ami-hvm*-gp2' + org: '137112412989' + baseOS: '' # K0s parameters k0s: - version: v1.30.2+k0s.0 + version: v1.30.4+k0s.0 -# Optionally install applications defined under +# Optionally install applications defined under # templates/beachheadservices into target cluster installBeachHeadServices: false diff --git a/templates/provider/cluster-api-provider-aws/Chart.yaml b/templates/provider/cluster-api-provider-aws/Chart.yaml index 022a29335..2fd9af300 100644 --- a/templates/provider/cluster-api-provider-aws/Chart.yaml +++ b/templates/provider/cluster-api-provider-aws/Chart.yaml @@ -13,11 +13,11 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.1 +version: 0.1.2 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "2.6.1" +appVersion: '2.6.1' annotations: hmc.mirantis.com/infrastructure-providers: aws diff --git a/templates/provider/cluster-api-provider-aws/templates/provider.yaml b/templates/provider/cluster-api-provider-aws/templates/provider.yaml index d0e0b14cb..605bd141d 100644 --- a/templates/provider/cluster-api-provider-aws/templates/provider.yaml +++ b/templates/provider/cluster-api-provider-aws/templates/provider.yaml @@ -9,3 +9,6 @@ spec: name: {{ .Values.configSecret.name }} namespace: {{ .Values.configSecret.namespace | default .Release.Namespace | trunc 63 }} {{- end }} + manager: + featureGates: + ExternalResourceGC: true diff --git a/templates/provider/hmc-templates/files/templates/aws-hosted-cp.yaml b/templates/provider/hmc-templates/files/templates/aws-hosted-cp.yaml index 34254aacb..d2aa2b417 100644 --- a/templates/provider/hmc-templates/files/templates/aws-hosted-cp.yaml +++ b/templates/provider/hmc-templates/files/templates/aws-hosted-cp.yaml @@ -5,4 +5,4 @@ metadata: spec: helm: chartName: aws-hosted-cp - chartVersion: 0.1.2 + chartVersion: 0.1.3 diff --git a/templates/provider/hmc-templates/files/templates/aws-standalone-cp.yaml b/templates/provider/hmc-templates/files/templates/aws-standalone-cp.yaml index 5f6d1b55d..d26c386e3 100644 --- a/templates/provider/hmc-templates/files/templates/aws-standalone-cp.yaml +++ b/templates/provider/hmc-templates/files/templates/aws-standalone-cp.yaml @@ -5,4 +5,4 @@ metadata: spec: helm: chartName: aws-standalone-cp - chartVersion: 0.1.2 + chartVersion: 0.1.3 diff --git a/templates/provider/hmc-templates/files/templates/cluster-api-provider-aws.yaml b/templates/provider/hmc-templates/files/templates/cluster-api-provider-aws.yaml index 6e3bdd67b..df4de06c4 100644 --- a/templates/provider/hmc-templates/files/templates/cluster-api-provider-aws.yaml +++ b/templates/provider/hmc-templates/files/templates/cluster-api-provider-aws.yaml @@ -5,4 +5,4 @@ metadata: spec: helm: chartName: cluster-api-provider-aws - chartVersion: 0.1.1 + chartVersion: 0.1.2 diff --git a/templates/provider/hmc/values.yaml b/templates/provider/hmc/values.yaml index 064f4c3f1..13829c117 100644 --- a/templates/provider/hmc/values.yaml +++ b/templates/provider/hmc/values.yaml @@ -1,15 +1,14 @@ -nameOverride: "" -fullnameOverride: "" +nameOverride: '' +fullnameOverride: '' admissionWebhook: enabled: false port: 9443 - certDir: "/tmp/k8s-webhook-server/serving-certs/" + certDir: '/tmp/k8s-webhook-server/serving-certs/' controller: - defaultRegistryURL: "oci://ghcr.io/mirantis/hmc/charts" - defaultRepoType: "oci" - registryCredsSecret: "" + defaultRegistryURL: 'oci://ghcr.io/mirantis/hmc/charts' + registryCredsSecret: '' insecureRegistry: false createManagement: true createTemplates: true diff --git a/test/e2e/controller.go b/test/e2e/controller.go new file mode 100644 index 000000000..8b8bdf244 --- /dev/null +++ b/test/e2e/controller.go @@ -0,0 +1,86 @@ +// Copyright 2024 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package e2e + +import ( + "context" + "fmt" + "strings" + + "github.com/Mirantis/hmc/test/kubeclient" + "github.com/Mirantis/hmc/test/managedcluster" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +const ( + hmcControllerLabel = "app.kubernetes.io/name=hmc" +) + +// verifyControllersUp validates that controllers for the given providers list +// are running and ready. Optionally specify providers to check for rather than +// waiting for all providers to be ready. +func verifyControllersUp(kc *kubeclient.KubeClient, providers ...managedcluster.ProviderType) error { + if err := validateController(kc, hmcControllerLabel, "hmc-controller-manager"); err != nil { + return err + } + + if providers == nil { + providers = []managedcluster.ProviderType{ + managedcluster.ProviderCAPI, + managedcluster.ProviderAWS, + managedcluster.ProviderAzure, + } + } + + for _, provider := range providers { + // Ensure only one controller pod is running. + if err := validateController(kc, managedcluster.GetProviderLabel(provider), string(provider)); err != nil { + return err + } + } + + return nil +} + +func validateController(kc *kubeclient.KubeClient, labelSelector string, name string) error { + deployList, err := kc.Client.AppsV1().Deployments(kc.Namespace).List(context.Background(), metav1.ListOptions{ + LabelSelector: labelSelector, + }) + if err != nil { + return fmt.Errorf("failed to list %s controller deployments: %w", name, err) + } + + if len(deployList.Items) < 1 { + return fmt.Errorf("expected at least 1 %s controller deployment, got %d", + name, len(deployList.Items)) + } + + deployment := deployList.Items[0] + + // Ensure the deployment is not being deleted. + if deployment.DeletionTimestamp != nil { + return fmt.Errorf("controller pod: %s deletion timestamp should be nil, got: %v", + deployment.Name, deployment.DeletionTimestamp) + } + // Ensure the deployment is running and has the expected name. + if !strings.Contains(deployment.Name, "controller-manager") { + return fmt.Errorf("controller deployment name %s does not contain 'controller-manager'", deployment.Name) + } + if deployment.Status.ReadyReplicas < 1 { + return fmt.Errorf("controller deployment: %s does not yet have any ReadyReplicas", deployment.Name) + } + + return nil +} diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 0d3b748d3..ba04eb939 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -18,6 +18,7 @@ import ( "bufio" "context" "fmt" + "net/url" "os" "os/exec" "path/filepath" @@ -32,12 +33,12 @@ import ( "github.com/Mirantis/hmc/test/kubeclient" "github.com/Mirantis/hmc/test/managedcluster" + "github.com/Mirantis/hmc/test/managedcluster/aws" "github.com/Mirantis/hmc/test/utils" ) const ( - namespace = "hmc-system" - hmcControllerLabel = "app.kubernetes.io/name=hmc" + namespace = "hmc-system" ) var _ = Describe("controller", Ordered, func() { @@ -49,151 +50,202 @@ var _ = Describe("controller", Ordered, func() { }) AfterAll(func() { - By("removing the controller-manager") - cmd := exec.Command("make", "test-destroy") - _, err := utils.Run(cmd) - Expect(err).NotTo(HaveOccurred()) + if !noCleanup() { + By("removing the controller-manager") + cmd := exec.Command("make", "test-destroy") + _, err := utils.Run(cmd) + Expect(err).NotTo(HaveOccurred()) + } }) Context("Operator", func() { It("should run successfully", func() { - kc, err := kubeclient.NewFromLocal(namespace) - ExpectWithOffset(1, err).NotTo(HaveOccurred()) - ExpectWithOffset(1, kc.CreateAWSCredentialsKubeSecret(context.Background())).To(Succeed()) + kc := kubeclient.NewFromLocal(namespace) + aws.CreateCredentialSecret(context.Background(), kc) By("validating that the hmc-controller and capi provider controllers are running") - verifyControllersUp := func() error { - if err := verifyControllerUp(kc, hmcControllerLabel, "hmc-controller-manager"); err != nil { - return err - } - - for _, provider := range []managedcluster.ProviderType{ - managedcluster.ProviderCAPI, - managedcluster.ProviderAWS, - managedcluster.ProviderAzure, - } { - // Ensure only one controller pod is running. - if err := verifyControllerUp(kc, managedcluster.GetProviderLabel(provider), string(provider)); err != nil { - return err - } - } - - return nil - } Eventually(func() error { - err := verifyControllersUp() + err := verifyControllersUp(kc) if err != nil { - _, _ = fmt.Fprintf(GinkgoWriter, "Controller pod validation failed: %v\n", err) + _, _ = fmt.Fprintf(GinkgoWriter, "Controller validation failed: %v\n", err) return err } - return nil }).WithTimeout(15 * time.Minute).WithPolling(10 * time.Second).Should(Succeed()) }) }) - Context("AWS Templates", func() { + Describe("AWS Templates", Label("provider"), func() { var ( - kc *kubeclient.KubeClient - deleteFunc func() error - clusterName string - err error + kc *kubeclient.KubeClient + standaloneClient *kubeclient.KubeClient + standaloneDeleteFunc func() error + hostedDeleteFunc func() error + kubecfgDeleteFunc func() error + clusterName string ) BeforeAll(func() { By("ensuring AWS credentials are set") - kc, err = kubeclient.NewFromLocal(namespace) - ExpectWithOffset(2, err).NotTo(HaveOccurred()) - ExpectWithOffset(2, kc.CreateAWSCredentialsKubeSecret(context.Background())).To(Succeed()) + kc = kubeclient.NewFromLocal(namespace) + aws.CreateCredentialSecret(context.Background(), kc) }) AfterEach(func() { // If we failed collect logs from each of the affiliated controllers // as well as the output of clusterctl to store as artifacts. - if CurrentSpecReport().Failed() { + if CurrentSpecReport().Failed() && !noCleanup() { By("collecting failure logs from controllers") - collectLogArtifacts(kc, clusterName, managedcluster.ProviderAWS, managedcluster.ProviderCAPI) - } + if kc != nil { + collectLogArtifacts(kc, clusterName, managedcluster.ProviderAWS, managedcluster.ProviderCAPI) + } + if standaloneClient != nil { + collectLogArtifacts(standaloneClient, clusterName, managedcluster.ProviderAWS, managedcluster.ProviderCAPI) + } - // Delete the deployments if they were created. - if deleteFunc != nil { - By("deleting the deployment") - err = deleteFunc() - Expect(err).NotTo(HaveOccurred()) + By("deleting resources after failure") + for _, deleteFunc := range []func() error{ + kubecfgDeleteFunc, + hostedDeleteFunc, + standaloneDeleteFunc, + } { + if deleteFunc != nil { + err := deleteFunc() + Expect(err).NotTo(HaveOccurred()) + } + } } + }) - // Purge the AWS resources, the AfterAll for the controller will - // clean up the management cluster. - By("nuking remaining AWS resources") - err = os.Setenv("CLUSTER_NAME", clusterName) - Expect(err).NotTo(HaveOccurred()) - cmd := exec.Command("make", "dev-aws-nuke") + It("should work with an AWS provider", func() { + // Deploy a standalone cluster and verify it is running/ready. + // Deploy standalone with an xlarge instance since it will also be + // hosting the hosted cluster. + GinkgoT().Setenv(managedcluster.EnvVarAWSInstanceType, "t3.xlarge") + GinkgoT().Setenv(managedcluster.EnvVarInstallBeachHeadServices, "false") + + templateBy(managedcluster.TemplateAWSStandaloneCP, "creating a ManagedCluster") + sd := managedcluster.GetUnstructured(managedcluster.ProviderAWS, managedcluster.TemplateAWSStandaloneCP) + clusterName = sd.GetName() + + standaloneDeleteFunc = kc.CreateManagedCluster(context.Background(), sd) + + templateBy(managedcluster.TemplateAWSStandaloneCP, "waiting for infrastructure to deploy successfully") + resourcesToValidate := managedcluster.NewDeployedValidation() + Eventually(func() error { + return managedcluster.VerifyProviderDeployed( + context.Background(), kc, clusterName, + managedcluster.TemplateAWSStandaloneCP, resourcesToValidate, + ) + }).WithTimeout(30 * time.Minute).WithPolling(10 * time.Second).Should(Succeed()) + + templateBy(managedcluster.TemplateAWSHostedCP, "installing controller and templates on standalone cluster") + + // Download the KUBECONFIG for the standalone cluster and load it + // so we can call Make targets against this cluster. + // TODO: Ideally we shouldn't use Make here and should just convert + // these Make targets into Go code, but this will require a + // helmclient. + var kubeCfgPath string + kubeCfgPath, kubecfgDeleteFunc = kc.WriteKubeconfig(context.Background(), clusterName) + + GinkgoT().Setenv("KUBECONFIG", kubeCfgPath) + cmd := exec.Command("make", "dev-deploy") _, err := utils.Run(cmd) - ExpectWithOffset(2, err).NotTo(HaveOccurred()) - }) + Expect(err).NotTo(HaveOccurred()) + cmd = exec.Command("make", "dev-templates") + _, err = utils.Run(cmd) + Expect(err).NotTo(HaveOccurred()) + Expect(os.Unsetenv("KUBECONFIG")).To(Succeed()) - for _, template := range []managedcluster.Template{ - managedcluster.TemplateAWSStandaloneCP, - managedcluster.TemplateAWSHostedCP, - } { - It(fmt.Sprintf("should work with an AWS provider and %s template", template), func() { - if template == managedcluster.TemplateAWSHostedCP { - // TODO: Create AWS resources for hosted control plane. - Skip("AWS hosted control plane not yet implemented") - } + // Ensure AWS credentials are set in the standalone cluster. + standaloneClient = kc.NewFromCluster(context.Background(), namespace, clusterName) + aws.CreateCredentialSecret(context.Background(), standaloneClient) - By("creating a Deployment") - d := managedcluster.GetUnstructured(managedcluster.ProviderAWS, template) - clusterName = d.GetName() + templateBy(managedcluster.TemplateAWSHostedCP, "validating that the controller is ready") + Eventually(func() error { + err := verifyControllersUp(standaloneClient, managedcluster.ProviderCAPI, managedcluster.ProviderAWS) + if err != nil { + _, _ = fmt.Fprintf( + GinkgoWriter, "[%s] controller validation failed: %v\n", + string(managedcluster.TemplateAWSHostedCP), err) + return err + } + return nil + }).WithTimeout(15 * time.Minute).WithPolling(10 * time.Second).Should(Succeed()) - deleteFunc, err = kc.CreateManagedCluster(context.Background(), d) - Expect(err).NotTo(HaveOccurred()) + // Populate the environment variables required for the hosted + // cluster. + aws.PopulateHostedTemplateVars(context.Background(), kc) - By("waiting for infrastructure providers to deploy successfully") - Eventually(func() error { - return managedcluster.VerifyProviderDeployed(context.Background(), kc, clusterName) - }).WithTimeout(30 * time.Minute).WithPolling(10 * time.Second).Should(Succeed()) + templateBy(managedcluster.TemplateAWSHostedCP, "creating a ManagedCluster") + hd := managedcluster.GetUnstructured(managedcluster.ProviderAWS, managedcluster.TemplateAWSHostedCP) + hdName := hd.GetName() - By("verify the deployment deletes successfully") - err = deleteFunc() - Expect(err).NotTo(HaveOccurred()) - Eventually(func() error { - return managedcluster.VerifyProviderDeleted(context.Background(), kc, clusterName) - }).WithTimeout(10 * time.Minute).WithPolling(10 * time.Second).Should(Succeed()) - }) - } - }) -}) + // Deploy the hosted cluster on top of the standalone cluster. + hostedDeleteFunc = standaloneClient.CreateManagedCluster(context.Background(), hd) -func verifyControllerUp(kc *kubeclient.KubeClient, labelSelector string, name string) error { - deployList, err := kc.Client.AppsV1().Deployments(kc.Namespace).List(context.Background(), metav1.ListOptions{ - LabelSelector: labelSelector, - }) - if err != nil { - return fmt.Errorf("failed to list %s controller deployments: %w", name, err) - } + // Patch the AWSCluster resource as Ready, see: + // https://docs.k0smotron.io/stable/capi-aws/#prepare-the-aws-infra-provider + // Use Eventually as the AWSCluster might not be available + // immediately. + templateBy(managedcluster.TemplateAWSHostedCP, "Patching AWSCluster to ready") + Eventually(func() error { + if err := aws.PatchAWSClusterReady(context.Background(), standaloneClient, hd.GetName()); err != nil { + _, _ = fmt.Fprintf(GinkgoWriter, "failed to patch AWSCluster to ready: %v, retrying...\n", err) + return err + } + _, _ = fmt.Fprintf(GinkgoWriter, "Patch succeeded\n") + return nil + }).WithTimeout(time.Minute).WithPolling(5 * time.Second).Should(Succeed()) - if len(deployList.Items) < 1 { - return fmt.Errorf("expected at least 1 %s controller deployment, got %d", - name, len(deployList.Items)) - } + // Verify the hosted cluster is running/ready. + templateBy(managedcluster.TemplateAWSHostedCP, "waiting for infrastructure to deploy successfully") + resourcesToValidate = managedcluster.NewDeployedValidation() + Eventually(func() error { + return managedcluster.VerifyProviderDeployed( + context.Background(), standaloneClient, hdName, + managedcluster.TemplateAWSHostedCP, resourcesToValidate, + ) + }).WithTimeout(30 * time.Minute).WithPolling(10 * time.Second).Should(Succeed()) + + // Delete the hosted ManagedCluster and verify it is removed. + templateBy(managedcluster.TemplateAWSHostedCP, "deleting the ManagedCluster") + err = hostedDeleteFunc() + Expect(err).NotTo(HaveOccurred()) - deployment := deployList.Items[0] + resourcesToValidate = managedcluster.NewDeletionValidation() + Eventually(func() error { + return managedcluster.VerifyProviderDeleted( + context.Background(), standaloneClient, hdName, + managedcluster.TemplateAWSHostedCP, resourcesToValidate, + ) + }).WithTimeout(10 * time.Minute).WithPolling(10 * time.Second).Should(Succeed()) + + // Now delete the standalone ManagedCluster and verify it is + // removed, it is deleted last since it is the basis for the hosted + // cluster. + templateBy(managedcluster.TemplateAWSStandaloneCP, "deleting the ManagedCluster") + err = standaloneDeleteFunc() + Expect(err).NotTo(HaveOccurred()) - // Ensure the deployment is not being deleted. - if deployment.DeletionTimestamp != nil { - return fmt.Errorf("controller pod: %s deletion timestamp should be nil, got: %v", - deployment.Name, deployment.DeletionTimestamp) - } - // Ensure the deployment is running and has the expected name. - if !strings.Contains(deployment.Name, "controller-manager") { - return fmt.Errorf("controller deployment name %s does not contain 'controller-manager'", deployment.Name) - } - if deployment.Status.ReadyReplicas < 1 { - return fmt.Errorf("controller deployment: %s does not yet have any ReadyReplicas", deployment.Name) - } + resourcesToValidate = managedcluster.NewDeletionValidation() + Eventually(func() error { + return managedcluster.VerifyProviderDeleted( + context.Background(), kc, clusterName, + managedcluster.TemplateAWSStandaloneCP, resourcesToValidate, + ) + }).WithTimeout(10 * time.Minute).WithPolling(10 * + time.Second).Should(Succeed()) + }) + }) +}) - return nil +// templateBy wraps a Ginkgo By with a block describing the template being +// tested. +func templateBy(t managedcluster.Template, description string) { + GinkgoHelper() + By(fmt.Sprintf("[%s] %s", t, description)) } // collectLogArtfiacts collects log output from each the HMC controller, @@ -205,6 +257,14 @@ func collectLogArtifacts(kc *kubeclient.KubeClient, clusterName string, provider filterLabels := []string{hmcControllerLabel} + var host string + hostURL, err := url.Parse(kc.Config.Host) + if err != nil { + utils.WarnError(fmt.Errorf("failed to parse host from kubeconfig: %w", err)) + } else { + host = strings.ReplaceAll(hostURL.Host, ":", "_") + } + for _, providerType := range providerTypes { filterLabels = append(filterLabels, managedcluster.GetProviderLabel(providerType)) } @@ -225,7 +285,7 @@ func collectLogArtifacts(kc *kubeclient.KubeClient, clusterName string, provider } defer podLogs.Close() //nolint:errcheck - output, err := os.Create(fmt.Sprintf("./test/e2e/%s.log", pod.Name)) + output, err := os.Create(fmt.Sprintf("./test/e2e/%s.log", host+"-"+pod.Name)) if err != nil { utils.WarnError(fmt.Errorf("failed to create log file for pod %s: %w", pod.Name, err)) continue @@ -248,8 +308,17 @@ func collectLogArtifacts(kc *kubeclient.KubeClient, clusterName string, provider return } - err = os.WriteFile(filepath.Join("test/e2e", "clusterctl.log"), output, 0644) + err = os.WriteFile(filepath.Join("test/e2e", host+"-"+"clusterctl.log"), output, 0644) if err != nil { utils.WarnError(fmt.Errorf("failed to write clusterctl log: %w", err)) } } + +func noCleanup() bool { + noCleanup := os.Getenv(managedcluster.EnvVarNoCleanup) + if noCleanup != "" { + By(fmt.Sprintf("skipping After nodes as %s is set", managedcluster.EnvVarNoCleanup)) + } + + return noCleanup != "" +} diff --git a/test/kubeclient/kubeclient.go b/test/kubeclient/kubeclient.go index c1a4453c9..ee06b9102 100644 --- a/test/kubeclient/kubeclient.go +++ b/test/kubeclient/kubeclient.go @@ -18,12 +18,10 @@ import ( "context" "fmt" "os" - "os/exec" "path/filepath" - "github.com/Mirantis/hmc/test/utils" . "github.com/onsi/ginkgo/v2" - corev1 "k8s.io/api/core/v1" + . "github.com/onsi/gomega" apiextensionsclientset "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -35,10 +33,6 @@ import ( "k8s.io/client-go/tools/clientcmd" ) -const ( - awsCredentialsSecretName = "aws-variables" -) - type KubeClient struct { Namespace string @@ -49,144 +43,126 @@ type KubeClient struct { // NewFromLocal creates a new instance of KubeClient from a given namespace // using the locally found kubeconfig. -func NewFromLocal(namespace string) (*KubeClient, error) { - configBytes, err := getLocalKubeConfig() - if err != nil { - return nil, fmt.Errorf("failed to get local kubeconfig: %w", err) - } - - return new(configBytes, namespace) +func NewFromLocal(namespace string) *KubeClient { + GinkgoHelper() + return new(getLocalKubeConfig(), namespace) } // NewFromCluster creates a new KubeClient using the kubeconfig stored in the // secret affiliated with the given clusterName. Since it relies on fetching // the kubeconfig from secret it needs an existing kubeclient. -func (kc *KubeClient) NewFromCluster(ctx context.Context, namespace, clusterName string) (*KubeClient, error) { - secret, err := kc.Client.CoreV1().Secrets(kc.Namespace).Get(ctx, clusterName+"-kubeconfig", metav1.GetOptions{}) - if err != nil { - return nil, fmt.Errorf("failed to get cluster: %q kubeconfig secret: %w", clusterName, err) +func (kc *KubeClient) NewFromCluster(ctx context.Context, namespace, clusterName string) *KubeClient { + GinkgoHelper() + return new(kc.getKubeconfigSecretData(ctx, clusterName), namespace) +} + +// WriteKubeconfig writes the kubeconfig for the given clusterName to the +// test/e2e directory returning the path to the file and a function to delete +// it later. +func (kc *KubeClient) WriteKubeconfig(ctx context.Context, clusterName string) (string, func() error) { + GinkgoHelper() + + secretData := kc.getKubeconfigSecretData(ctx, clusterName) + + dir, err := os.Getwd() + Expect(err).NotTo(HaveOccurred()) + + path := filepath.Join(dir, clusterName+"-kubeconfig") + + Expect( + os.WriteFile(path, secretData, 0644)). + To(Succeed()) + + deleteFunc := func() error { + return os.Remove(filepath.Join(dir, path)) } + return path, deleteFunc +} + +func (kc *KubeClient) getKubeconfigSecretData(ctx context.Context, clusterName string) []byte { + GinkgoHelper() + + secret, err := kc.Client.CoreV1().Secrets(kc.Namespace).Get(ctx, clusterName+"-kubeconfig", metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred(), "failed to get cluster: %q kubeconfig secret", clusterName) + secretData, ok := secret.Data["value"] - if !ok { - return nil, fmt.Errorf("kubeconfig secret %q has no 'value' key", clusterName) - } + Expect(ok).To(BeTrue(), "kubeconfig secret %q has no 'value' key", clusterName) - return new(secretData, namespace) + return secretData } // getLocalKubeConfig returns the kubeconfig file content. -func getLocalKubeConfig() ([]byte, error) { +func getLocalKubeConfig() []byte { + GinkgoHelper() + // Use the KUBECONFIG environment variable if it is set, otherwise use the // default path. kubeConfig, ok := os.LookupEnv("KUBECONFIG") if !ok { homeDir, err := os.UserHomeDir() - if err != nil { - return nil, fmt.Errorf("failed to get user home directory: %w", err) - } + Expect(err).NotTo(HaveOccurred(), "failed to get user home directory") kubeConfig = filepath.Join(homeDir, ".kube", "config") } configBytes, err := os.ReadFile(kubeConfig) - if err != nil { - return nil, fmt.Errorf("failed to read %q: %w", kubeConfig, err) - } + Expect(err).NotTo(HaveOccurred(), "failed to read %q", kubeConfig) - return configBytes, nil + return configBytes } // new creates a new instance of KubeClient from a given namespace using // the local kubeconfig. -func new(configBytes []byte, namespace string) (*KubeClient, error) { +func new(configBytes []byte, namespace string) *KubeClient { + GinkgoHelper() + config, err := clientcmd.RESTConfigFromKubeConfig(configBytes) - if err != nil { - return nil, fmt.Errorf("failed to parse kubeconfig: %w", err) - } + Expect(err).NotTo(HaveOccurred(), "failed to parse kubeconfig") clientSet, err := kubernetes.NewForConfig(config) - if err != nil { - return nil, fmt.Errorf("could not initialize kubernetes client: %w", err) - } + Expect(err).NotTo(HaveOccurred(), "failed to initialize kubernetes client") extendedClientSet, err := apiextensionsclientset.NewForConfig(config) - if err != nil { - return nil, fmt.Errorf("failed to initialize apiextensions clientset: %w", err) - } + Expect(err).NotTo(HaveOccurred(), "failed to initialize apiextensions clientset") return &KubeClient{ Namespace: namespace, Client: clientSet, ExtendedClient: extendedClientSet, Config: config, - }, nil -} - -// CreateAWSCredentialsKubeSecret uses clusterawsadm to encode existing AWS -// credentials and create a secret in the given namespace if one does not -// already exist. -func (kc *KubeClient) CreateAWSCredentialsKubeSecret(ctx context.Context) error { - _, err := kc.Client.CoreV1().Secrets(kc.Namespace).Get(ctx, awsCredentialsSecretName, metav1.GetOptions{}) - if !apierrors.IsNotFound(err) { - return nil - } - - cmd := exec.Command("./bin/clusterawsadm", "bootstrap", "credentials", "encode-as-profile") - output, err := utils.Run(cmd) - if err != nil { - return fmt.Errorf("failed to encode AWS credentials with clusterawsadm: %w", err) } - - _, err = kc.Client.CoreV1().Secrets(kc.Namespace).Create(ctx, &corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{ - Name: awsCredentialsSecretName, - }, - Data: map[string][]byte{ - "AWS_B64ENCODED_CREDENTIALS": output, - }, - Type: corev1.SecretTypeOpaque, - }, metav1.CreateOptions{}) - if err != nil { - return fmt.Errorf("failed to create AWS credentials secret: %w", err) - } - - return nil } // GetDynamicClient returns a dynamic client for the given GroupVersionResource. -func (kc *KubeClient) GetDynamicClient(gvr schema.GroupVersionResource) (dynamic.ResourceInterface, error) { +func (kc *KubeClient) GetDynamicClient(gvr schema.GroupVersionResource) dynamic.ResourceInterface { + GinkgoHelper() + client, err := dynamic.NewForConfig(kc.Config) - if err != nil { - return nil, fmt.Errorf("failed to create dynamic client: %w", err) - } + Expect(err).NotTo(HaveOccurred(), "failed to create dynamic client") - return client.Resource(gvr).Namespace(kc.Namespace), nil + return client.Resource(gvr).Namespace(kc.Namespace) } -// CreateDeployment creates a managedcluster.hmc.mirantis.com in the given +// CreateManagedCluster creates a managedcluster.hmc.mirantis.com in the given // namespace and returns a DeleteFunc to clean up the deployment. // The DeleteFunc is a no-op if the deployment has already been deleted. func (kc *KubeClient) CreateManagedCluster( - ctx context.Context, managedcluster *unstructured.Unstructured) (func() error, error) { - kind := managedcluster.GetKind() + ctx context.Context, managedcluster *unstructured.Unstructured) func() error { + GinkgoHelper() - if kind != "ManagedCluster" { - return nil, fmt.Errorf("expected kind ManagedCluster, got: %s", kind) - } + kind := managedcluster.GetKind() + Expect(kind).To(Equal("ManagedCluster")) - client, err := kc.GetDynamicClient(schema.GroupVersionResource{ + client := kc.GetDynamicClient(schema.GroupVersionResource{ Group: "hmc.mirantis.com", Version: "v1alpha1", Resource: "managedclusters", }) - if err != nil { - return nil, fmt.Errorf("failed to get dynamic client: %w", err) - } - _, err = client.Create(ctx, managedcluster, metav1.CreateOptions{}) - if err != nil { - return nil, fmt.Errorf("failed to create Deployment: %w", err) + _, err := client.Create(ctx, managedcluster, metav1.CreateOptions{}) + if !apierrors.IsAlreadyExists(err) { + Expect(err).NotTo(HaveOccurred(), "failed to create %s", kind) } return func() error { @@ -195,7 +171,7 @@ func (kc *KubeClient) CreateManagedCluster( return nil } return err - }, nil + } } // GetCluster returns a Cluster resource by name. @@ -206,14 +182,11 @@ func (kc *KubeClient) GetCluster(ctx context.Context, clusterName string) (*unst Resource: "clusters", } - client, err := kc.GetDynamicClient(gvr) - if err != nil { - Fail(fmt.Sprintf("failed to get %s client: %v", gvr.Resource, err)) - } + client := kc.GetDynamicClient(gvr) cluster, err := client.Get(ctx, clusterName, metav1.GetOptions{}) if err != nil { - return nil, fmt.Errorf("failed to get %s %s: %w", gvr.Resource, clusterName, err) + return nil, fmt.Errorf("failed to get %s %s", gvr.Resource, clusterName) } return cluster, nil @@ -223,16 +196,13 @@ func (kc *KubeClient) GetCluster(ctx context.Context, clusterName string) (*unst // affiliated with the given clusterName. func (kc *KubeClient) listResource( ctx context.Context, gvr schema.GroupVersionResource, clusterName string) ([]unstructured.Unstructured, error) { - client, err := kc.GetDynamicClient(gvr) - if err != nil { - Fail(fmt.Sprintf("failed to get %s client: %v", gvr.Resource, err)) - } + client := kc.GetDynamicClient(gvr) resources, err := client.List(ctx, metav1.ListOptions{ LabelSelector: "cluster.x-k8s.io/cluster-name=" + clusterName, }) if err != nil { - return nil, fmt.Errorf("failed to list %s: %w", gvr.Resource, err) + return nil, fmt.Errorf("failed to list %s", gvr.Resource) } return resources.Items, nil @@ -240,6 +210,8 @@ func (kc *KubeClient) listResource( // ListMachines returns a list of Machine resources for the given cluster. func (kc *KubeClient) ListMachines(ctx context.Context, clusterName string) ([]unstructured.Unstructured, error) { + GinkgoHelper() + return kc.listResource(ctx, schema.GroupVersionResource{ Group: "cluster.x-k8s.io", Version: "v1beta1", @@ -251,6 +223,8 @@ func (kc *KubeClient) ListMachines(ctx context.Context, clusterName string) ([]u // given cluster. func (kc *KubeClient) ListMachineDeployments( ctx context.Context, clusterName string) ([]unstructured.Unstructured, error) { + GinkgoHelper() + return kc.listResource(ctx, schema.GroupVersionResource{ Group: "cluster.x-k8s.io", Version: "v1beta1", @@ -260,6 +234,8 @@ func (kc *KubeClient) ListMachineDeployments( func (kc *KubeClient) ListK0sControlPlanes( ctx context.Context, clusterName string) ([]unstructured.Unstructured, error) { + GinkgoHelper() + return kc.listResource(ctx, schema.GroupVersionResource{ Group: "controlplane.cluster.x-k8s.io", Version: "v1beta1", diff --git a/test/managedcluster/aws/aws.go b/test/managedcluster/aws/aws.go new file mode 100644 index 000000000..11cd276ba --- /dev/null +++ b/test/managedcluster/aws/aws.go @@ -0,0 +1,137 @@ +// Copyright 2024 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file contains specific helpers for testing a managed cluster +// that uses the AWS infrastructure provider. +package aws + +import ( + "context" + "encoding/json" + "os" + "os/exec" + + corev1 "k8s.io/api/core/v1" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/dynamic" + + "github.com/Mirantis/hmc/test/kubeclient" + "github.com/Mirantis/hmc/test/managedcluster" + "github.com/Mirantis/hmc/test/utils" +) + +// CreateCredentialSecret uses clusterawsadm to encode existing AWS +// credentials and create a secret in the given namespace if one does not +// already exist. +func CreateCredentialSecret(ctx context.Context, kc *kubeclient.KubeClient) { + GinkgoHelper() + + _, err := kc.Client.CoreV1().Secrets(kc.Namespace). + Get(ctx, managedcluster.AWSCredentialsSecretName, metav1.GetOptions{}) + if !apierrors.IsNotFound(err) { + Expect(err).NotTo(HaveOccurred(), "failed to get AWS credentials secret") + return + } + + cmd := exec.Command("./bin/clusterawsadm", "bootstrap", "credentials", "encode-as-profile") + output, err := utils.Run(cmd) + Expect(err).NotTo(HaveOccurred(), "failed to encode AWS credentials with clusterawsadm") + + _, err = kc.Client.CoreV1().Secrets(kc.Namespace).Create(ctx, &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: managedcluster.AWSCredentialsSecretName, + }, + Data: map[string][]byte{ + "AWS_B64ENCODED_CREDENTIALS": output, + }, + Type: corev1.SecretTypeOpaque, + }, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred(), "failed to create AWS credentials secret") +} + +// PopulateHostedTemplateVars populates the environment variables required for +// the AWS hosted CP template by querying the standalone CP cluster with the +// given kubeclient. +func PopulateHostedTemplateVars(ctx context.Context, kc *kubeclient.KubeClient) { + GinkgoHelper() + + c := getAWSClusterClient(kc) + awsCluster, err := c.Get(ctx, os.Getenv(managedcluster.EnvVarManagedClusterName), metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred(), "failed to get AWS cluster") + + vpcID, found, err := unstructured.NestedString(awsCluster.Object, "spec", "network", "vpc", "id") + Expect(err).NotTo(HaveOccurred(), "failed to get AWS cluster VPC ID") + Expect(found).To(BeTrue(), "AWS cluster has no VPC ID") + + subnets, found, err := unstructured.NestedSlice(awsCluster.Object, "spec", "network", "subnets") + Expect(err).NotTo(HaveOccurred(), "failed to get AWS cluster subnets") + Expect(found).To(BeTrue(), "AWS cluster has no subnets") + + subnet, ok := subnets[0].(map[string]interface{}) + Expect(ok).To(BeTrue(), "failed to cast subnet to map") + + subnetID, ok := subnet["resourceID"].(string) + Expect(ok).To(BeTrue(), "failed to cast subnet ID to string") + + subnetAZ, ok := subnet["availabilityZone"].(string) + Expect(ok).To(BeTrue(), "failed to cast subnet availability zone to string") + + securityGroupID, found, err := unstructured.NestedString( + awsCluster.Object, "status", "networkStatus", "securityGroups", "node", "id") + Expect(err).NotTo(HaveOccurred(), "failed to get AWS cluster security group ID") + Expect(found).To(BeTrue(), "AWS cluster has no security group ID") + + GinkgoT().Setenv(managedcluster.EnvVarAWSVPCID, vpcID) + GinkgoT().Setenv(managedcluster.EnvVarAWSSubnetID, subnetID) + GinkgoT().Setenv(managedcluster.EnvVarAWSSubnetAvailabilityZone, subnetAZ) + GinkgoT().Setenv(managedcluster.EnvVarAWSSecurityGroupID, securityGroupID) +} + +func PatchAWSClusterReady(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { + GinkgoHelper() + + c := getAWSClusterClient(kc) + + trueStatus := map[string]interface{}{ + "status": map[string]interface{}{ + "ready": true, + }, + } + + patchBytes, err := json.Marshal(trueStatus) + Expect(err).NotTo(HaveOccurred(), "failed to marshal patch bytes") + + _, err = c.Patch(ctx, clusterName, types.MergePatchType, + patchBytes, metav1.PatchOptions{}, "status") + if err != nil { + return err + } + + return nil +} + +func getAWSClusterClient(kc *kubeclient.KubeClient) dynamic.ResourceInterface { + return kc.GetDynamicClient(schema.GroupVersionResource{ + Group: "infrastructure.cluster.x-k8s.io", + Version: "v1beta2", + Resource: "awsclusters", + }) +} diff --git a/test/managedcluster/constants.go b/test/managedcluster/constants.go new file mode 100644 index 000000000..cd43527fb --- /dev/null +++ b/test/managedcluster/constants.go @@ -0,0 +1,37 @@ +// Copyright 2024 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package managedcluster + +const ( + // Common + EnvVarManagedClusterName = "MANAGED_CLUSTER_NAME" + EnvVarHostedManagedClusterName = "HOSTED_MANAGED_CLUSTER_NAME" + EnvVarInstallBeachHeadServices = "INSTALL_BEACH_HEAD_SERVICES" + EnvVarControlPlaneNumber = "CONTROL_PLANE_NUMBER" + EnvVarWorkerNumber = "WORKER_NUMBER" + EnvVarNamespace = "NAMESPACE" + // EnvVarNoCleanup disables After* cleanup in provider specs to allow for + // debugging of test failures. + EnvVarNoCleanup = "NO_CLEANUP" + + // AWS + EnvVarAWSVPCID = "AWS_VPC_ID" + EnvVarAWSSubnetID = "AWS_SUBNET_ID" + EnvVarAWSSubnetAvailabilityZone = "AWS_SUBNET_AVAILABILITY_ZONE" + EnvVarAWSInstanceType = "AWS_INSTANCE_TYPE" + EnvVarAWSSecurityGroupID = "AWS_SG_ID" + EnvVarPublicIP = "AWS_PUBLIC_IP" + AWSCredentialsSecretName = "aws-variables" +) diff --git a/test/managedcluster/managedcluster.go b/test/managedcluster/managedcluster.go index 28783ea5d..9e717bd62 100644 --- a/test/managedcluster/managedcluster.go +++ b/test/managedcluster/managedcluster.go @@ -59,18 +59,35 @@ func GetProviderLabel(provider ProviderType) string { func GetUnstructured(provider ProviderType, templateName Template) *unstructured.Unstructured { GinkgoHelper() - generatedName := uuid.New().String()[:8] + "-e2e-test" - _, _ = fmt.Fprintf(GinkgoWriter, "Generated cluster name: %q\n", generatedName) + generatedName := os.Getenv(EnvVarManagedClusterName) + if generatedName == "" { + generatedName = uuid.New().String()[:8] + "-e2e-test" + _, _ = fmt.Fprintf(GinkgoWriter, "Generated cluster name: %q\n", generatedName) + GinkgoT().Setenv(EnvVarManagedClusterName, generatedName) + } else { + _, _ = fmt.Fprintf(GinkgoWriter, "Using configured cluster name: %q\n", generatedName) + } switch provider { case ProviderAWS: - Expect(os.Setenv("MANAGED_CLUSTER_NAME", generatedName)).NotTo(HaveOccurred()) - var managedClusterTemplateBytes []byte switch templateName { case TemplateAWSStandaloneCP: managedClusterTemplateBytes = awsStandaloneCPManagedClusterTemplateBytes case TemplateAWSHostedCP: + hostedName := generatedName + "-hosted" + + GinkgoT().Setenv(EnvVarHostedManagedClusterName, hostedName) + _, _ = fmt.Fprintf(GinkgoWriter, "Creating hosted ManagedCluster with name: %q\n", hostedName) + + // Validate environment vars that do not have defaults are populated. + validateDeploymentVars([]string{ + EnvVarAWSVPCID, + EnvVarAWSSubnetID, + EnvVarAWSSubnetAvailabilityZone, + EnvVarAWSSecurityGroupID, + }) + managedClusterTemplateBytes = awsHostedCPManagedClusterTemplateBytes default: Fail(fmt.Sprintf("unsupported AWS template: %s", templateName)) @@ -91,3 +108,11 @@ func GetUnstructured(provider ProviderType, templateName Template) *unstructured return nil } + +func validateDeploymentVars(v []string) { + GinkgoHelper() + + for _, envVar := range v { + Expect(os.Getenv(envVar)).NotTo(BeEmpty(), envVar+" must be set") + } +} diff --git a/test/managedcluster/resources/aws-hosted-cp.yaml.tpl b/test/managedcluster/resources/aws-hosted-cp.yaml.tpl index 894bb6667..06a4cf4cc 100644 --- a/test/managedcluster/resources/aws-hosted-cp.yaml.tpl +++ b/test/managedcluster/resources/aws-hosted-cp.yaml.tpl @@ -1,16 +1,15 @@ apiVersion: hmc.mirantis.com/v1alpha1 kind: ManagedCluster metadata: - name: ${MANAGED_CLUSTER_NAME} + name: ${HOSTED_MANAGED_CLUSTER_NAME} spec: template: aws-hosted-cp config: vpcID: ${AWS_VPC_ID} region: ${AWS_REGION} - publicIP: ${PUBLIC_IP:=true} subnets: - id: ${AWS_SUBNET_ID} availabilityZone: ${AWS_SUBNET_AVAILABILITY_ZONE} - instanceType: ${INSTANCE_TYPE:=t3.medium} + instanceType: ${AWS_INSTANCE_TYPE:=t3.medium} securityGroupIDs: - ${AWS_SG_ID} diff --git a/test/managedcluster/resources/aws-standalone-cp.yaml.tpl b/test/managedcluster/resources/aws-standalone-cp.yaml.tpl index 7825a2833..0d107ca43 100644 --- a/test/managedcluster/resources/aws-standalone-cp.yaml.tpl +++ b/test/managedcluster/resources/aws-standalone-cp.yaml.tpl @@ -6,12 +6,13 @@ spec: template: aws-standalone-cp config: region: ${AWS_REGION} - publicIP: ${PUBLIC_IP:=true} + publicIP: ${AWS_PUBLIC_IP:=true} controlPlaneNumber: ${CONTROL_PLANE_NUMBER:=1} workersNumber: ${WORKERS_NUMBER:=1} controlPlane: - instanceType: ${INSTANCE_TYPE:=t3.small} + instanceType: ${AWS_INSTANCE_TYPE:=t3.small} worker: - instanceType: ${INSTANCE_TYPE:=t3.small} + instanceType: ${AWS_INSTANCE_TYPE:=t3.small} + installBeachHeadServices: ${INSTALL_BEACH_HEAD_SERVICES:=true} diff --git a/test/managedcluster/validate_deleted.go b/test/managedcluster/validate_deleted.go index dc5712a9f..5afdd9812 100644 --- a/test/managedcluster/validate_deleted.go +++ b/test/managedcluster/validate_deleted.go @@ -23,17 +23,21 @@ import ( "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" ) -var deletionValidators = map[string]resourceValidationFunc{ - "clusters": validateClusterDeleted, - "machinedeployments": validateMachineDeploymentsDeleted, - "control-planes": validateK0sControlPlanesDeleted, +func NewDeletionValidation() map[string]resourceValidationFunc { + return map[string]resourceValidationFunc{ + "clusters": validateClusterDeleted, + "machinedeployments": validateMachineDeploymentsDeleted, + "control-planes": validateK0sControlPlanesDeleted, + } } // VerifyProviderDeleted is a provider-agnostic verification that checks // to ensure generic resources managed by the provider have been deleted. // It is intended to be used in conjunction with an Eventually block. -func VerifyProviderDeleted(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { - return verifyProviderAction(ctx, kc, clusterName, deletionValidators, +func VerifyProviderDeleted( + ctx context.Context, kc *kubeclient.KubeClient, clusterName string, + templateName Template, resourcesToValidate map[string]resourceValidationFunc) error { + return verifyProviderAction(ctx, kc, clusterName, templateName, resourcesToValidate, []string{"clusters", "machinedeployments", "control-planes"}) } @@ -52,7 +56,7 @@ func validateClusterDeleted(ctx context.Context, kc *kubeclient.KubeClient, clus // like this, we probably don't want to wait the full Eventually // for something like this, but we can't immediately fail the test // either. - return fmt.Errorf("cluster %q exists, but is not in 'Deleting' phase", clusterName) + return fmt.Errorf("cluster: %q exists, but is not in 'Deleting' phase", clusterName) } conditions, err := utils.GetConditionsFromUnstructured(cluster) diff --git a/test/managedcluster/validate_deployed.go b/test/managedcluster/validate_deployed.go index f6423fb2b..df0c99a14 100644 --- a/test/managedcluster/validate_deployed.go +++ b/test/managedcluster/validate_deployed.go @@ -34,19 +34,23 @@ import ( // resource. type resourceValidationFunc func(context.Context, *kubeclient.KubeClient, string) error -var resourceValidators = map[string]resourceValidationFunc{ - "clusters": validateCluster, - "machines": validateMachines, - "control-planes": validateK0sControlPlanes, - "csi-driver": validateCSIDriver, - "ccm": validateCCM, +func NewDeployedValidation() map[string]resourceValidationFunc { + return map[string]resourceValidationFunc{ + "clusters": validateCluster, + "machines": validateMachines, + "control-planes": validateK0sControlPlanes, + "csi-driver": validateCSIDriver, + "ccm": validateCCM, + } } // VerifyProviderDeployed is a provider-agnostic verification that checks // to ensure generic resources managed by the provider have been deleted. // It is intended to be used in conjunction with an Eventually block. -func VerifyProviderDeployed(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { - return verifyProviderAction(ctx, kc, clusterName, resourceValidators, +func VerifyProviderDeployed( + ctx context.Context, kc *kubeclient.KubeClient, clusterName string, + templateName Template, resourceValidationMap map[string]resourceValidationFunc) error { + return verifyProviderAction(ctx, kc, clusterName, templateName, resourceValidationMap, []string{"clusters", "machines", "control-planes", "csi-driver", "ccm"}) } @@ -60,7 +64,8 @@ func VerifyProviderDeployed(ctx context.Context, kc *kubeclient.KubeClient, clus // should be used to end the spec early. func verifyProviderAction( ctx context.Context, kc *kubeclient.KubeClient, clusterName string, - resourcesToValidate map[string]resourceValidationFunc, order []string) error { + templateName Template, resourcesToValidate map[string]resourceValidationFunc, + order []string) error { // Sequentially validate each resource type, only returning the first error // as to not move on to the next resource type until the first is resolved. // We use []string here since order is important. @@ -71,11 +76,11 @@ func verifyProviderAction( } if err := validator(ctx, kc, clusterName); err != nil { - _, _ = fmt.Fprintf(GinkgoWriter, "[%s] validation error: %v\n", name, err) + _, _ = fmt.Fprintf(GinkgoWriter, "[%s/%s] validation error: %v\n", templateName, name, err) return err } - _, _ = fmt.Fprintf(GinkgoWriter, "[%s] validation succeeded\n", name) + _, _ = fmt.Fprintf(GinkgoWriter, "[%s/%s] validation succeeded\n", templateName, name) delete(resourcesToValidate, name) } @@ -111,7 +116,7 @@ func validateCluster(ctx context.Context, kc *kubeclient.KubeClient, clusterName func validateMachines(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { machines, err := kc.ListMachines(ctx, clusterName) if err != nil { - return fmt.Errorf("failed to list machines: %w", err) + return err } for _, machine := range machines { @@ -130,7 +135,7 @@ func validateMachines(ctx context.Context, kc *kubeclient.KubeClient, clusterNam func validateK0sControlPlanes(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { controlPlanes, err := kc.ListK0sControlPlanes(ctx, clusterName) if err != nil { - return fmt.Errorf("failed to list K0sControlPlanes: %w", err) + return err } for _, controlPlane := range controlPlanes { @@ -171,14 +176,11 @@ func validateK0sControlPlanes(ctx context.Context, kc *kubeclient.KubeClient, cl // validateCSIDriver validates that the provider CSI driver is functioning // by creating a PVC and verifying it enters "Bound" status. func validateCSIDriver(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { - clusterKC, err := kc.NewFromCluster(ctx, "default", clusterName) - if err != nil { - Fail(fmt.Sprintf("failed to create KubeClient for managed cluster %s: %v", clusterName, err)) - } + clusterKC := kc.NewFromCluster(ctx, "default", clusterName) pvcName := clusterName + "-csi-test-pvc" - _, err = clusterKC.Client.CoreV1().PersistentVolumeClaims(clusterKC.Namespace). + _, err := clusterKC.Client.CoreV1().PersistentVolumeClaims(clusterKC.Namespace). Create(ctx, &corev1.PersistentVolumeClaim{ ObjectMeta: metav1.ObjectMeta{ Name: pvcName, @@ -261,14 +263,11 @@ func validateCSIDriver(ctx context.Context, kc *kubeclient.KubeClient, clusterNa // functional by creating a LoadBalancer service and verifying it is assigned // an external IP. func validateCCM(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { - clusterKC, err := kc.NewFromCluster(ctx, "default", clusterName) - if err != nil { - Fail(fmt.Sprintf("failed to create KubeClient for managed cluster %s: %v", clusterName, err)) - } + clusterKC := kc.NewFromCluster(ctx, "default", clusterName) createdServiceName := "loadbalancer-" + clusterName - _, err = clusterKC.Client.CoreV1().Services(clusterKC.Namespace).Create(ctx, &corev1.Service{ + _, err := clusterKC.Client.CoreV1().Services(clusterKC.Namespace).Create(ctx, &corev1.Service{ ObjectMeta: metav1.ObjectMeta{ Name: createdServiceName, }, diff --git a/test/utils/utils.go b/test/utils/utils.go index 4e0d767f4..de714b0fe 100644 --- a/test/utils/utils.go +++ b/test/utils/utils.go @@ -27,8 +27,31 @@ import ( "k8s.io/apimachinery/pkg/runtime" ) -// Run executes the provided command within this context +// Run executes the provided command within this context and returns it's +// output. Run does not wait for the command to finish, use Wait instead. func Run(cmd *exec.Cmd) ([]byte, error) { + command := prepareCmd(cmd) + _, _ = fmt.Fprintf(GinkgoWriter, "running: %s\n", command) + + output, err := cmd.Output() + if err != nil { + return nil, handleCmdError(err, command) + } + + return output, nil +} + +func handleCmdError(err error, command string) error { + var exitError *exec.ExitError + + if errors.As(err, &exitError) { + return fmt.Errorf("%s failed with error: (%v): %s", command, err, string(exitError.Stderr)) + } + + return fmt.Errorf("%s failed with error: %w", command, err) +} + +func prepareCmd(cmd *exec.Cmd) string { dir, _ := GetProjectDir() cmd.Dir = dir @@ -37,19 +60,7 @@ func Run(cmd *exec.Cmd) ([]byte, error) { } cmd.Env = append(os.Environ(), "GO111MODULE=on") - command := strings.Join(cmd.Args, " ") - _, _ = fmt.Fprintf(GinkgoWriter, "running: %s\n", command) - - output, err := cmd.Output() - if err != nil { - var exitError *exec.ExitError - - if errors.As(err, &exitError) { - return output, fmt.Errorf("%s failed with error: (%v): %s", command, err, string(exitError.Stderr)) - } - } - - return output, nil + return strings.Join(cmd.Args, " ") } // LoadImageToKindCluster loads a local docker image to the kind cluster