diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 91214ced8..b81fad71d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,12 +1,12 @@ name: E2E Tests -concurrency: - group: test-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - on: - pull_request_target: - types: [labeled] + pull_request: + types: + - labeled + - opened + - synchronize + - reopened branches: - main - release-* @@ -15,31 +15,92 @@ on: - '**.md' env: GO_VERSION: '1.22' + AWS_REGION: us-west-2 + AWS_ACCESS_KEY_ID: ${{ secrets.CI_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.CI_AWS_SECRET_ACCESS_KEY }} jobs: e2etest: + if: ${{ contains( github.event.pull_request.labels.*.name, 'test e2e') }} + concurrency: + group: test-e2e-${{ github.head_ref || github.run_id }} + cancel-in-progress: true name: E2E Tests runs-on: ubuntu-latest - if: contains(github.event.pull_request.labels.*.name, 'test-e2e') - env: - AWS_REGION: us-west-2 - AWS_ACCESS_KEY_ID: ${{ secrets.CI_AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.CI_AWS_SECRET_ACCESS_KEY }} + outputs: + clustername: ${{ steps.vars.outputs.clustername }} + version: ${{ steps.vars.outputs.version }} steps: - name: Checkout repository uses: actions/checkout@v4 + with: + fetch-depth: 0 - name: Setup Go uses: actions/setup-go@v5 with: - go-version: ${{ env.GO_VERSION }} + go-version: ${{ env.GO_VERSION }} + - name: Set up Buildx + uses: docker/setup-buildx-action@v3 + - name: Login to GHCR + uses: docker/login-action@v3.3.0 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Get outputs + id: vars + run: | + echo "version=$(git describe --tags --always)" >> $GITHUB_OUTPUT + echo "clustername=ci-$(date +%s)-e2e-test" >> $GITHUB_OUTPUT + - name: Build and push HMC controller image + uses: docker/build-push-action@v6 + with: + build-args: | + LD_FLAGS=-s -w -X github.com/Mirantis/hmc/internal/build.Version=${{ steps.vars.outputs.version }} + context: . + platforms: linux/amd64 + tags: | + ghcr.io/mirantis/hmc/controller-ci:${{ steps.vars.outputs.version }} + push: true + cache-from: type=gha + cache-to: type=gha,mode=max + - name: Prepare and push HMC template charts + run: | + make hmc-chart-release + REGISTRY_REPO="oci://ghcr.io/mirantis/hmc/charts-ci" make helm-push - name: Setup kubectl uses: azure/setup-kubectl@v4 - name: Run E2E tests + env: + MANAGED_CLUSTER_NAME: ${{ steps.vars.outputs.clustername }} + REGISTRY_REPO: 'oci://ghcr.io/mirantis/hmc/charts-ci' + IMG: 'ghcr.io/mirantis/hmc/controller-ci:${{ steps.vars.outputs.version }}' run: | make test-e2e - name: Archive test results + if: ${{ failure() }} uses: actions/upload-artifact@v4 with: - name: test-logs - path: | - test/e2e/*.log + name: test-logs + path: | + test/e2e/*.log + cleanup: + name: Cleanup + needs: e2etest + runs-on: ubuntu-latest + if: ${{ always() }} + timeout-minutes: 15 + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Setup Go + uses: actions/setup-go@v5 + with: + go-version: ${{ env.GO_VERSION }} + - name: AWS Test Resources + env: + CLUSTER_NAME: '${{ needs.e2etest.outputs.clustername }}' + run: | + make dev-aws-nuke diff --git a/.golangci.yml b/.golangci.yml index a6ffbedab..17db13dbb 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -9,10 +9,10 @@ issues: # restore some of the defaults # (fill in the rest as needed) exclude-rules: - - path: 'api/*' + - path: "api/*" linters: - lll - - path: 'internal/*' + - path: "internal/*" linters: - dupl - lll diff --git a/Makefile b/Makefile index c12e68617..0a83f9912 100644 --- a/Makefile +++ b/Makefile @@ -2,6 +2,8 @@ NAMESPACE ?= hmc-system VERSION ?= $(shell git describe --tags --always) # Image URL to use all building/pushing image targets IMG ?= hmc/controller:latest +IMG_REPO = $(shell echo $(IMG) | cut -d: -f1) +IMG_TAG = $(shell echo $(IMG) | cut -d: -f2) # ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary. ENVTEST_K8S_VERSION = 1.29.0 @@ -103,10 +105,11 @@ tidy: test: generate-all fmt vet envtest tidy external-crd ## Run tests. KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test $$(go list ./... | grep -v /e2e) -coverprofile cover.out -# Utilize Kind or modify the e2e tests to load the image locally, enabling compatibility with other vendors. -.PHONY: test-e2e # Run the e2e tests against a Kind k8s instance that is spun up. +# Utilize Kind or modify the e2e tests to load the image locally, enabling +# compatibility with other vendors. +.PHONY: test-e2e # Run the e2e tests using a Kind k8s instance as the management cluster. test-e2e: cli-install - KIND_CLUSTER_NAME="hmc-test" KIND_VERSION=$(KIND_VERSION) go test ./test/e2e/ -v -ginkgo.v -timeout=2h + KIND_CLUSTER_NAME="hmc-test" KIND_VERSION=$(KIND_VERSION) go test ./test/e2e/ -v -ginkgo.v -timeout=2h .PHONY: lint lint: golangci-lint ## Run golangci-lint linter & yamllint @@ -240,6 +243,13 @@ hmc-deploy: helm .PHONY: dev-deploy dev-deploy: ## Deploy HMC helm chart to the K8s cluster specified in ~/.kube/config. + @$(YQ) eval -i '.image.repository = "$(IMG_REPO)"' config/dev/hmc_values.yaml + @$(YQ) eval -i '.image.tag = "$(IMG_TAG)"' config/dev/hmc_values.yaml + @if [ "$(REGISTRY_REPO)" = "oci://127.0.0.1:$(REGISTRY_PORT)/charts" ]; then \ + $(YQ) eval -i '.controller.defaultRegistryURL = "oci://$(REGISTRY_NAME):5000/charts"' config/dev/hmc_values.yaml; \ + else \ + $(YQ) eval -i '.controller.defaultRegistryURL = "$(REGISTRY_REPO)"' config/dev/hmc_values.yaml; \ + fi; \ $(MAKE) hmc-deploy HMC_VALUES=config/dev/hmc_values.yaml $(KUBECTL) rollout restart -n $(NAMESPACE) deployment/hmc-controller-manager @@ -297,7 +307,7 @@ dev-azure-creds: envsubst dev-vsphere-creds: envsubst @NAMESPACE=$(NAMESPACE) $(ENVSUBST) -no-unset -i config/dev/vsphere-credentials.yaml | $(KUBECTL) apply -f - -.PHONY: dev-apply +.PHONY: dev-apply ## Apply the development environment by deploying the kind cluster, local registry and the HMC helm chart. dev-apply: kind-deploy registry-deploy dev-push dev-deploy dev-templates .PHONY: dev-destroy @@ -317,15 +327,16 @@ dev-mcluster-delete: envsubst .PHONY: dev-creds-apply dev-creds-apply: dev-$(DEV_PROVIDER)-creds -.PHONY: envsubst awscli dev-aws-nuke -dev-aws-nuke: ## Warning: Destructive! Nuke all AWS resources deployed by 'DEV_PROVIDER=aws dev-provider-apply', prefix with CLUSTER_NAME to nuke a specific cluster. +.PHONY: dev-aws-nuke +dev-aws-nuke: envsubst awscli yq cloud-nuke ## Warning: Destructive! Nuke all AWS resources deployed by 'DEV_PROVIDER=aws dev-provider-apply', prefix with CLUSTER_NAME to nuke a specific cluster. + @CLUSTER_NAME=$(CLUSTER_NAME) YQ=$(YQ) AWSCLI=$(AWSCLI) bash -c "./scripts/aws-nuke-ccm.sh elb" @CLUSTER_NAME=$(CLUSTER_NAME) $(ENVSUBST) < config/dev/cloud_nuke.yaml.tpl > config/dev/cloud_nuke.yaml - DISABLE_TELEMETRY=true $(CLOUDNUKE) aws --region $$AWS_REGION --force --config config/dev/cloud_nuke.yaml --resource-type vpc,eip,nat-gateway,ec2-subnet,elb,elbv2,internet-gateway,network-interface,security-group + DISABLE_TELEMETRY=true $(CLOUDNUKE) aws --region $$AWS_REGION --force --config config/dev/cloud_nuke.yaml --resource-type vpc,eip,nat-gateway,ec2,ec2-subnet,elb,elbv2,ebs,internet-gateway,network-interface,security-group @rm config/dev/cloud_nuke.yaml - @CLUSTER_NAME=$(CLUSTER_NAME) YQ=$(YQ) AWSCLI=$(AWSCLI) bash -c ./scripts/aws-nuke-ccm.sh + @CLUSTER_NAME=$(CLUSTER_NAME) YQ=$(YQ) AWSCLI=$(AWSCLI) bash -c "./scripts/aws-nuke-ccm.sh ebs" .PHONY: cli-install -cli-install: clusterawsadm clusterctl cloud-nuke yq awscli ## Install the necessary CLI tools for deployment, development and testing. +cli-install: clusterawsadm clusterctl cloud-nuke envsubst yq awscli ## Install the necessary CLI tools for deployment, development and testing. ##@ Dependencies @@ -450,9 +461,21 @@ $(ENVSUBST): | $(LOCALBIN) .PHONY: awscli awscli: $(AWSCLI) $(AWSCLI): | $(LOCALBIN) - curl "https://awscli.amazonaws.com/awscli-exe-$(OS)-$(shell uname -m)-$(AWSCLI_VERSION).zip" -o "/tmp/awscliv2.zip" - unzip /tmp/awscliv2.zip -d /tmp - /tmp/aws/install -i $(LOCALBIN)/aws-cli -b $(LOCALBIN) --update + @if [ $(OS) == "linux" ]; then \ + curl "https://awscli.amazonaws.com/awscli-exe-linux-$(shell uname -m)-$(AWSCLI_VERSION).zip" -o "/tmp/awscliv2.zip"; \ + unzip -qq /tmp/awscliv2.zip -d /tmp; \ + /tmp/aws/install -i $(LOCALBIN)/aws-cli -b $(LOCALBIN) --update; \ + fi; \ + if [ $(OS) == "darwin" ]; then \ + curl "https://awscli.amazonaws.com/AWSCLIV2.pkg" -o "AWSCLIV2.pkg"; \ + installer -pkg AWSCLIV2.pkg -target $(LOCALBIN) -applyChoiceChangesXML choices.xml; \ + rm AWSCLIV2.pkg; \ + fi; \ + if [ $(OS) == "windows" ]; then \ + echo "Installing to $(LOCALBIN) on Windows is not yet implemented"; \ + exit 1; \ + fi; \ + # go-install-tool will 'go install' any package with custom target and name of binary, if it doesn't exist # $1 - target path with name of binary (ideally with version) diff --git a/docs/aws/hosted-control-plane.md b/docs/aws/hosted-control-plane.md index d492d7165..19ea7919c 100644 --- a/docs/aws/hosted-control-plane.md +++ b/docs/aws/hosted-control-plane.md @@ -19,7 +19,12 @@ reused with a management cluster. If you deployed your AWS Kubernetes cluster using Cluster API Provider AWS (CAPA) you can obtain all the necessary data with the commands below or use the template found below in the -[HMC ManagedCluster manifest generation](#hmc-managed-cluster-manifest-generation) section. +[HMC ManagedCluster manifest +generation](#hmc-managed-cluster-manifest-generation) section. + +If using the `aws-standalone-cp` template to deploy a hosted cluster it is +recommended to use a `t3.large` or larger instance type as the `hmc-controller` +and other provider controllers will need a large amount of resources to run. **VPC ID** @@ -89,7 +94,7 @@ Grab the following `ManagedCluster` manifest template and save it to a file name apiVersion: hmc.mirantis.com/v1alpha1 kind: ManagedCluster metadata: - name: aws-hosted-cp + name: aws-hosted spec: template: aws-hosted-cp config: @@ -109,3 +114,24 @@ Then run the following command to create the `managedcluster.yaml`: ``` kubectl get awscluster cluster -o go-template="$(cat managedcluster.yaml.tpl)" > managedcluster.yaml ``` +## Deployment Tips +* Ensure HMC templates and the controller image are somewhere public and + fetchable. +* For installing the HMC charts and templates from a custom repository, load + the `kubeconfig` from the cluster and run the commands: + +``` +KUBECONFIG=kubeconfig IMG="ghcr.io/mirantis/hmc/controller-ci:v0.0.1-179-ga5bdf29" REGISTRY_REPO="oci://ghcr.io/mirantis/hmc/charts-ci" make dev-apply +KUBECONFIG=kubeconfig make dev-templates +``` +* The infrastructure will need to manually be marked `Ready` to get the + `MachineDeployment` to scale up. You can patch the `AWSCluster` kind using + the command: + +``` +KUBECONFIG=kubeconfig kubectl patch AWSCluster --type=merge --subresource status --patch 'status: {ready: true}' -n hmc-system +``` + +For additional information on why this is required [click here](https://docs.k0smotron.io/stable/capi-aws/#:~:text=As%20we%20are%20using%20self%2Dmanaged%20infrastructure%20we%20need%20to%20manually%20mark%20the%20infrastructure%20ready.%20This%20can%20be%20accomplished%20using%20the%20following%20command). + + diff --git a/docs/dev.md b/docs/dev.md index c07b787a0..1bf47bad8 100644 --- a/docs/dev.md +++ b/docs/dev.md @@ -107,3 +107,34 @@ export KUBECONFIG=~/.kube/config kubectl --kubeconfig ~/.kube/config get secret -n hmc-system -kubeconfig -o=jsonpath={.data.value} | base64 -d > kubeconfig ``` +## Running E2E tests locally +E2E tests can be ran locally via the `make test-e2e` target. In order to have +CI properly deploy a non-local registry will need to be used and the Helm charts +and hmc-controller image will need to exist on the registry, for example, using +GHCR: + +``` +IMG="ghcr.io/mirantis/hmc/controller-ci:v0.0.1-179-ga5bdf29" \ + REGISTRY_REPO="oci://ghcr.io/mirantis/hmc/charts-ci" \ + make test-e2e +``` + +Optionally, the `NO_CLEANUP=1` env var can be used to disable `After` nodes from +running within some specs, this will allow users to debug tests by re-running +them without the need to wait a while for an infrastructure deployment to occur. +For subsequent runs the `MANAGED_CLUSTER_NAME=` env var should be +passed to tell the test what cluster name to use so that it does not try to +generate a new name and deploy a new cluster. + +Tests that run locally use autogenerated names like `12345678-e2e-test` while +tests that run in CI use names such as `ci-1234567890-e2e-test`. You can always +pass `MANAGED_CLUSTER_NAME=` from the get-go to customize the name used by the +test. + +### Nuke created resources +In CI we run `make dev-aws-nuke` to cleanup test resources, you can do so +manually with: + +``` +CLUSTER_NAME=example-e2e-test make dev-aws-nuke +``` diff --git a/scripts/aws-nuke-ccm.sh b/scripts/aws-nuke-ccm.sh index 26e8a067c..df8c80d5b 100755 --- a/scripts/aws-nuke-ccm.sh +++ b/scripts/aws-nuke-ccm.sh @@ -33,28 +33,30 @@ if [ -z $AWSCLI ]; then exit 1 fi -echo "Checking for ELB with 'kubernetes.io/cluster/$CLUSTER_NAME' tag" -for LOADBALANCER in $($AWSCLI elb describe-load-balancers --output yaml | $YQ '.LoadBalancerDescriptions[].LoadBalancerName'); -do - echo "Checking ELB: $LOADBALANCER for 'kubernetes.io/cluster/$CLUSTER_NAME tag" - DESCRIBE_TAGS=$($AWSCLI elb describe-tags \ - --load-balancer-names $LOADBALANCER \ - --output yaml | $YQ '.TagDescriptions[].Tags.[]' | grep 'kubernetes.io/cluster/$CLUSTER_NAME') - if [ ! -z "${DESCRIBE_TAGS}" ]; then - echo "Deleting ELB: $LOADBALANCER" - $AWSCLI elb delete-load-balancer --load-balancer-name $LOADBALANCER - fi -done +if [ "$1" == "elb" ]; then + echo "Checking for ELB with '$CLUSTER_NAME' tag" + for LOADBALANCER in $($AWSCLI elb describe-load-balancers --output yaml | $YQ '.LoadBalancerDescriptions[].LoadBalancerName'); + do + echo "Checking ELB: $LOADBALANCER for tag" + DESCRIBE_TAGS=$($AWSCLI elb describe-tags --load-balancer-names $LOADBALANCER --output yaml | $YQ '.TagDescriptions[]' | grep $CLUSTER_NAME) + if [ ! -z "${DESCRIBE_TAGS}" ]; then + echo "Deleting ELB: $LOADBALANCER" + $AWSCLI elb delete-load-balancer --load-balancer-name $LOADBALANCER + fi + done +fi -echo "Checking for EBS Volumes with $CLUSTER_NAME within the 'kubernetes.io/created-for/pvc/name' tag" -for VOLUME in $($AWSCLI ec2 describe-volumes --output yaml | $YQ '.Volumes[].VolumeId'); -do - echo "Checking EBS Volume: $VOLUME for $CLUSTER_NAME claim" - DESCRIBE_VOLUMES=$($AWSCLI ec2 describe-volumes \ - --volume-id $VOLUME \ - --output yaml | $YQ '.Volumes | to_entries[] | .value.Tags[] | select(.Key == "kubernetes.io/created-for/pvc/name")' | grep $CLUSTER_NAME) - if [ ! -z "${DESCRIBE_VOLUMES}" ]; then - echo "Deleting EBS Volume: $VOLUME" - $AWSCLI ec2 delete-volume --volume-id $VOLUME - fi -done +if [ "$1" == "ebs" ]; then + echo "Checking for EBS Volumes with '$CLUSTER_NAME' within the 'kubernetes.io/created-for/pvc/name' tag" + for VOLUME in $($AWSCLI ec2 describe-volumes --output yaml | $YQ '.Volumes[].VolumeId'); + do + echo "Checking EBS Volume: $VOLUME for $CLUSTER_NAME claim" + DESCRIBE_VOLUMES=$($AWSCLI ec2 describe-volumes \ + --volume-id $VOLUME \ + --output yaml | $YQ '.Volumes | to_entries[] | .value.Tags[] | select(.Key == "kubernetes.io/created-for/pvc/name")' | grep $CLUSTER_NAME) + if [ ! -z "${DESCRIBE_VOLUMES}" ]; then + echo "Deleting EBS Volume: $VOLUME" + $AWSCLI ec2 delete-volume --volume-id $VOLUME + fi + done +fi diff --git a/templates/cluster/aws-hosted-cp/Chart.yaml b/templates/cluster/aws-hosted-cp/Chart.yaml index ca357f875..51672276a 100644 --- a/templates/cluster/aws-hosted-cp/Chart.yaml +++ b/templates/cluster/aws-hosted-cp/Chart.yaml @@ -1,18 +1,18 @@ apiVersion: v2 name: aws-hosted-cp -description: | +description: | An HMC template to deploy a k8s cluster on AWS with control plane components within the management cluster. type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.2 +version: 0.1.3 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "1.30.2+k0s.0" +appVersion: "1.30.4+k0s.0" annotations: hmc.mirantis.com/infrastructure-providers: aws hmc.mirantis.com/controlplane-providers: k0smotron diff --git a/templates/cluster/aws-hosted-cp/templates/awscluster.yaml b/templates/cluster/aws-hosted-cp/templates/awscluster.yaml index ea52af698..4c23f4b49 100644 --- a/templates/cluster/aws-hosted-cp/templates/awscluster.yaml +++ b/templates/cluster/aws-hosted-cp/templates/awscluster.yaml @@ -4,6 +4,7 @@ metadata: name: {{ include "cluster.name" . }} annotations: cluster.x-k8s.io/managed-by: k0smotron + aws.cluster.x-k8s.io/external-resource-gc: "true" finalizers: - hmc.mirantis.com/cleanup spec: diff --git a/templates/cluster/aws-hosted-cp/templates/k0smotroncontrolplane.yaml b/templates/cluster/aws-hosted-cp/templates/k0smotroncontrolplane.yaml index 3a038baa9..5fb372752 100644 --- a/templates/cluster/aws-hosted-cp/templates/k0smotroncontrolplane.yaml +++ b/templates/cluster/aws-hosted-cp/templates/k0smotroncontrolplane.yaml @@ -46,7 +46,8 @@ spec: - --cluster-name={{ include "cluster.name" . }} # Removing the default `node-role.kubernetes.io/control-plane` node selector # TODO: it does not work - # nodeSelector: "" + nodeSelector: + node-role.kubernetes.io/control-plane: null - name: aws-ebs-csi-driver namespace: kube-system chartname: aws-ebs-csi-driver/aws-ebs-csi-driver diff --git a/templates/cluster/aws-hosted-cp/values.yaml b/templates/cluster/aws-hosted-cp/values.yaml index a9491e27f..3208000f8 100644 --- a/templates/cluster/aws-hosted-cp/values.yaml +++ b/templates/cluster/aws-hosted-cp/values.yaml @@ -4,10 +4,10 @@ workersNumber: 2 clusterNetwork: pods: cidrBlocks: - - "10.244.0.0/16" + - "10.244.0.0/16" services: cidrBlocks: - - "10.96.0.0/12" + - "10.96.0.0/12" # AWS cluster parameters vpcID: "" @@ -43,4 +43,4 @@ k0smotron: # K0s parameters k0s: - version: v1.30.2+k0s.0 + version: v1.30.4+k0s.0 diff --git a/templates/cluster/aws-standalone-cp/Chart.yaml b/templates/cluster/aws-standalone-cp/Chart.yaml index 3bb77479d..2d1da1cd7 100644 --- a/templates/cluster/aws-standalone-cp/Chart.yaml +++ b/templates/cluster/aws-standalone-cp/Chart.yaml @@ -6,12 +6,12 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.2 +version: 0.1.3 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "1.30.2+k0s.0" +appVersion: "1.30.4+k0s.0" annotations: hmc.mirantis.com/infrastructure-providers: aws hmc.mirantis.com/controlplane-providers: k0s diff --git a/templates/cluster/aws-standalone-cp/templates/awscluster.yaml b/templates/cluster/aws-standalone-cp/templates/awscluster.yaml index f797a8c20..c199c7227 100644 --- a/templates/cluster/aws-standalone-cp/templates/awscluster.yaml +++ b/templates/cluster/aws-standalone-cp/templates/awscluster.yaml @@ -2,6 +2,8 @@ apiVersion: infrastructure.cluster.x-k8s.io/v1beta2 kind: AWSCluster metadata: name: {{ include "cluster.name" . }} + annotations: + aws.cluster.x-k8s.io/external-resource-gc: "true" spec: region: {{ .Values.region }} # identityRef: diff --git a/templates/cluster/aws-standalone-cp/values.yaml b/templates/cluster/aws-standalone-cp/values.yaml index 741228064..68f75376b 100644 --- a/templates/cluster/aws-standalone-cp/values.yaml +++ b/templates/cluster/aws-standalone-cp/values.yaml @@ -5,10 +5,10 @@ workersNumber: 2 clusterNetwork: pods: cidrBlocks: - - "10.244.0.0/16" + - "10.244.0.0/16" services: cidrBlocks: - - "10.96.0.0/12" + - "10.96.0.0/12" # AWS cluster parameters region: "" @@ -43,8 +43,8 @@ worker: # K0s parameters k0s: - version: v1.30.2+k0s.0 + version: v1.30.4+k0s.0 -# Optionally install applications defined under +# Optionally install applications defined under # templates/beachheadservices into target cluster installBeachHeadServices: false diff --git a/templates/provider/cluster-api-provider-aws/Chart.yaml b/templates/provider/cluster-api-provider-aws/Chart.yaml index 022a29335..f4d396f2f 100644 --- a/templates/provider/cluster-api-provider-aws/Chart.yaml +++ b/templates/provider/cluster-api-provider-aws/Chart.yaml @@ -13,7 +13,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.1 +version: 0.1.2 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. diff --git a/templates/provider/cluster-api-provider-aws/templates/provider.yaml b/templates/provider/cluster-api-provider-aws/templates/provider.yaml index d0e0b14cb..605bd141d 100644 --- a/templates/provider/cluster-api-provider-aws/templates/provider.yaml +++ b/templates/provider/cluster-api-provider-aws/templates/provider.yaml @@ -9,3 +9,6 @@ spec: name: {{ .Values.configSecret.name }} namespace: {{ .Values.configSecret.namespace | default .Release.Namespace | trunc 63 }} {{- end }} + manager: + featureGates: + ExternalResourceGC: true diff --git a/templates/provider/hmc-templates/files/templates/aws-hosted-cp.yaml b/templates/provider/hmc-templates/files/templates/aws-hosted-cp.yaml index 34254aacb..d2aa2b417 100644 --- a/templates/provider/hmc-templates/files/templates/aws-hosted-cp.yaml +++ b/templates/provider/hmc-templates/files/templates/aws-hosted-cp.yaml @@ -5,4 +5,4 @@ metadata: spec: helm: chartName: aws-hosted-cp - chartVersion: 0.1.2 + chartVersion: 0.1.3 diff --git a/templates/provider/hmc-templates/files/templates/aws-standalone-cp.yaml b/templates/provider/hmc-templates/files/templates/aws-standalone-cp.yaml index 5f6d1b55d..d26c386e3 100644 --- a/templates/provider/hmc-templates/files/templates/aws-standalone-cp.yaml +++ b/templates/provider/hmc-templates/files/templates/aws-standalone-cp.yaml @@ -5,4 +5,4 @@ metadata: spec: helm: chartName: aws-standalone-cp - chartVersion: 0.1.2 + chartVersion: 0.1.3 diff --git a/templates/provider/hmc-templates/files/templates/cluster-api-provider-aws.yaml b/templates/provider/hmc-templates/files/templates/cluster-api-provider-aws.yaml index 6e3bdd67b..df4de06c4 100644 --- a/templates/provider/hmc-templates/files/templates/cluster-api-provider-aws.yaml +++ b/templates/provider/hmc-templates/files/templates/cluster-api-provider-aws.yaml @@ -5,4 +5,4 @@ metadata: spec: helm: chartName: cluster-api-provider-aws - chartVersion: 0.1.1 + chartVersion: 0.1.2 diff --git a/templates/provider/hmc/values.yaml b/templates/provider/hmc/values.yaml index 064f4c3f1..6f297a780 100644 --- a/templates/provider/hmc/values.yaml +++ b/templates/provider/hmc/values.yaml @@ -8,7 +8,6 @@ admissionWebhook: controller: defaultRegistryURL: "oci://ghcr.io/mirantis/hmc/charts" - defaultRepoType: "oci" registryCredsSecret: "" insecureRegistry: false createManagement: true diff --git a/test/e2e/controller.go b/test/e2e/controller.go new file mode 100644 index 000000000..8b8bdf244 --- /dev/null +++ b/test/e2e/controller.go @@ -0,0 +1,86 @@ +// Copyright 2024 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package e2e + +import ( + "context" + "fmt" + "strings" + + "github.com/Mirantis/hmc/test/kubeclient" + "github.com/Mirantis/hmc/test/managedcluster" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +const ( + hmcControllerLabel = "app.kubernetes.io/name=hmc" +) + +// verifyControllersUp validates that controllers for the given providers list +// are running and ready. Optionally specify providers to check for rather than +// waiting for all providers to be ready. +func verifyControllersUp(kc *kubeclient.KubeClient, providers ...managedcluster.ProviderType) error { + if err := validateController(kc, hmcControllerLabel, "hmc-controller-manager"); err != nil { + return err + } + + if providers == nil { + providers = []managedcluster.ProviderType{ + managedcluster.ProviderCAPI, + managedcluster.ProviderAWS, + managedcluster.ProviderAzure, + } + } + + for _, provider := range providers { + // Ensure only one controller pod is running. + if err := validateController(kc, managedcluster.GetProviderLabel(provider), string(provider)); err != nil { + return err + } + } + + return nil +} + +func validateController(kc *kubeclient.KubeClient, labelSelector string, name string) error { + deployList, err := kc.Client.AppsV1().Deployments(kc.Namespace).List(context.Background(), metav1.ListOptions{ + LabelSelector: labelSelector, + }) + if err != nil { + return fmt.Errorf("failed to list %s controller deployments: %w", name, err) + } + + if len(deployList.Items) < 1 { + return fmt.Errorf("expected at least 1 %s controller deployment, got %d", + name, len(deployList.Items)) + } + + deployment := deployList.Items[0] + + // Ensure the deployment is not being deleted. + if deployment.DeletionTimestamp != nil { + return fmt.Errorf("controller pod: %s deletion timestamp should be nil, got: %v", + deployment.Name, deployment.DeletionTimestamp) + } + // Ensure the deployment is running and has the expected name. + if !strings.Contains(deployment.Name, "controller-manager") { + return fmt.Errorf("controller deployment name %s does not contain 'controller-manager'", deployment.Name) + } + if deployment.Status.ReadyReplicas < 1 { + return fmt.Errorf("controller deployment: %s does not yet have any ReadyReplicas", deployment.Name) + } + + return nil +} diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 70d463575..c0e06347a 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -18,6 +18,7 @@ import ( "bufio" "context" "fmt" + "net/url" "os" "os/exec" "path/filepath" @@ -32,13 +33,13 @@ import ( "github.com/Mirantis/hmc/test/kubeclient" "github.com/Mirantis/hmc/test/managedcluster" + "github.com/Mirantis/hmc/test/managedcluster/aws" + "github.com/Mirantis/hmc/test/managedcluster/vsphere" "github.com/Mirantis/hmc/test/utils" - vsphereutils "github.com/Mirantis/hmc/test/utils/vsphere" ) const ( - namespace = "hmc-system" - hmcControllerLabel = "app.kubernetes.io/name=hmc" + namespace = "hmc-system" ) var _ = Describe("controller", Ordered, func() { @@ -50,120 +51,203 @@ var _ = Describe("controller", Ordered, func() { }) AfterAll(func() { - By("removing the controller-manager") - cmd := exec.Command("make", "dev-destroy") - _, err := utils.Run(cmd) - Expect(err).NotTo(HaveOccurred()) + if !noCleanup() { + By("removing the controller-manager") + cmd := exec.Command("make", "dev-destroy") + _, err := utils.Run(cmd) + Expect(err).NotTo(HaveOccurred()) + } }) Context("Operator", func() { It("should run successfully", func() { - kc, err := kubeclient.NewFromLocal(namespace) - ExpectWithOffset(1, err).NotTo(HaveOccurred()) - ExpectWithOffset(1, kc.CreateAWSCredentialsKubeSecret(context.Background())).To(Succeed()) + kc := kubeclient.NewFromLocal(namespace) + aws.CreateCredentialSecret(context.Background(), kc) By("validating that the hmc-controller and capi provider controllers are running") - verifyControllersUp := func() error { - if err := verifyControllerUp(kc, hmcControllerLabel, "hmc-controller-manager"); err != nil { - return err - } - - for _, provider := range []managedcluster.ProviderType{ - managedcluster.ProviderCAPI, - managedcluster.ProviderAWS, - managedcluster.ProviderAzure, - managedcluster.ProviderVSphere, - } { - // Ensure only one controller pod is running. - if err := verifyControllerUp(kc, managedcluster.GetProviderLabel(provider), string(provider)); err != nil { - return err - } - } - - return nil - } Eventually(func() error { - err := verifyControllersUp() + err := verifyControllersUp(kc) if err != nil { - _, _ = fmt.Fprintf(GinkgoWriter, "Controller pod validation failed: %v\n", err) + _, _ = fmt.Fprintf(GinkgoWriter, "Controller validation failed: %v\n", err) return err } - return nil }).WithTimeout(15 * time.Minute).WithPolling(10 * time.Second).Should(Succeed()) }) }) - Context("AWS Templates", func() { + Describe("AWS Templates", func() { var ( - kc *kubeclient.KubeClient - deleteFunc func() error - clusterName string - err error + kc *kubeclient.KubeClient + standaloneClient *kubeclient.KubeClient + standaloneDeleteFunc func() error + hostedDeleteFunc func() error + kubecfgDeleteFunc func() error + clusterName string ) BeforeAll(func() { By("ensuring AWS credentials are set") - kc, err = kubeclient.NewFromLocal(namespace) - ExpectWithOffset(2, err).NotTo(HaveOccurred()) - ExpectWithOffset(2, kc.CreateAWSCredentialsKubeSecret(context.Background())).To(Succeed()) + kc = kubeclient.NewFromLocal(namespace) + aws.CreateCredentialSecret(context.Background(), kc) }) AfterEach(func() { // If we failed collect logs from each of the affiliated controllers // as well as the output of clusterctl to store as artifacts. - if CurrentSpecReport().Failed() { + if CurrentSpecReport().Failed() && !noCleanup() { By("collecting failure logs from controllers") - collectLogArtifacts(kc, clusterName, managedcluster.ProviderAWS, managedcluster.ProviderCAPI) - } + if kc != nil { + collectLogArtifacts(kc, clusterName, managedcluster.ProviderAWS, managedcluster.ProviderCAPI) + } + if standaloneClient != nil { + collectLogArtifacts(standaloneClient, clusterName, managedcluster.ProviderAWS, managedcluster.ProviderCAPI) + } - // Delete the deployments if they were created. - if deleteFunc != nil { - By("deleting the deployment") - err = deleteFunc() - Expect(err).NotTo(HaveOccurred()) + By("deleting resources after failure") + for _, deleteFunc := range []func() error{ + kubecfgDeleteFunc, + hostedDeleteFunc, + standaloneDeleteFunc, + } { + if deleteFunc != nil { + err := deleteFunc() + Expect(err).NotTo(HaveOccurred()) + } + } } + }) - // Purge the AWS resources, the AfterAll for the controller will - // clean up the management cluster. - By("nuking remaining AWS resources") - err = os.Setenv("CLUSTER_NAME", clusterName) - Expect(err).NotTo(HaveOccurred()) - cmd := exec.Command("make", "dev-aws-nuke") + It("should work with an AWS provider", func() { + // Deploy a standalone cluster and verify it is running/ready. + // Deploy standalone with an xlarge instance since it will also be + // hosting the hosted cluster. + GinkgoT().Setenv(managedcluster.EnvVarAWSInstanceType, "t3.xlarge") + GinkgoT().Setenv(managedcluster.EnvVarInstallBeachHeadServices, "false") + + templateBy(managedcluster.TemplateAWSStandaloneCP, "creating a ManagedCluster") + sd := managedcluster.GetUnstructured(managedcluster.TemplateAWSStandaloneCP) + clusterName = sd.GetName() + + standaloneDeleteFunc = kc.CreateManagedCluster(context.Background(), sd) + + templateBy(managedcluster.TemplateAWSStandaloneCP, "waiting for infrastructure to deploy successfully") + deploymentValidator := managedcluster.NewProviderValidator( + managedcluster.TemplateAWSStandaloneCP, + clusterName, + managedcluster.ValidationActionDeploy, + ) + + Eventually(func() error { + return deploymentValidator.Validate(context.Background(), kc) + }).WithTimeout(30 * time.Minute).WithPolling(10 * time.Second).Should(Succeed()) + + templateBy(managedcluster.TemplateAWSHostedCP, "installing controller and templates on standalone cluster") + + // Download the KUBECONFIG for the standalone cluster and load it + // so we can call Make targets against this cluster. + // TODO: Ideally we shouldn't use Make here and should just convert + // these Make targets into Go code, but this will require a + // helmclient. + var kubeCfgPath string + kubeCfgPath, kubecfgDeleteFunc = kc.WriteKubeconfig(context.Background(), clusterName) + + GinkgoT().Setenv("KUBECONFIG", kubeCfgPath) + cmd := exec.Command("make", "dev-deploy") _, err := utils.Run(cmd) - ExpectWithOffset(2, err).NotTo(HaveOccurred()) - }) + Expect(err).NotTo(HaveOccurred()) + cmd = exec.Command("make", "dev-templates") + _, err = utils.Run(cmd) + Expect(err).NotTo(HaveOccurred()) + Expect(os.Unsetenv("KUBECONFIG")).To(Succeed()) + + // Ensure AWS credentials are set in the standalone cluster. + standaloneClient = kc.NewFromCluster(context.Background(), namespace, clusterName) + aws.CreateCredentialSecret(context.Background(), standaloneClient) - for _, template := range []managedcluster.Template{ - managedcluster.TemplateAWSStandaloneCP, - managedcluster.TemplateAWSHostedCP, - } { - It(fmt.Sprintf("should work with an AWS provider and %s template", template), func() { - if template == managedcluster.TemplateAWSHostedCP { - // TODO: Create AWS resources for hosted control plane. - Skip("AWS hosted control plane not yet implemented") + templateBy(managedcluster.TemplateAWSHostedCP, "validating that the controller is ready") + Eventually(func() error { + err := verifyControllersUp(standaloneClient, managedcluster.ProviderCAPI, managedcluster.ProviderAWS) + if err != nil { + _, _ = fmt.Fprintf( + GinkgoWriter, "[%s] controller validation failed: %v\n", + string(managedcluster.TemplateAWSHostedCP), err) + return err } + return nil + }).WithTimeout(15 * time.Minute).WithPolling(10 * time.Second).Should(Succeed()) - By("creating a Deployment") - d := managedcluster.GetUnstructured(template) - clusterName = d.GetName() + // Populate the environment variables required for the hosted + // cluster. + aws.PopulateHostedTemplateVars(context.Background(), kc) - deleteFunc, err = kc.CreateManagedCluster(context.Background(), d) - Expect(err).NotTo(HaveOccurred()) + templateBy(managedcluster.TemplateAWSHostedCP, "creating a ManagedCluster") + hd := managedcluster.GetUnstructured(managedcluster.TemplateAWSHostedCP) + hdName := hd.GetName() - By("waiting for infrastructure providers to deploy successfully") - Eventually(func() error { - return managedcluster.VerifyProviderDeployed(context.Background(), kc, clusterName, managedcluster.ProviderAWS) - }).WithTimeout(30 * time.Minute).WithPolling(10 * time.Second).Should(Succeed()) + // Deploy the hosted cluster on top of the standalone cluster. + hostedDeleteFunc = standaloneClient.CreateManagedCluster(context.Background(), hd) - By("verify the deployment deletes successfully") - err = deleteFunc() + // Patch the AWSCluster resource as Ready, see: + // https://docs.k0smotron.io/stable/capi-aws/#prepare-the-aws-infra-provider + // Use Eventually as the AWSCluster might not be available + // immediately. + templateBy(managedcluster.TemplateAWSHostedCP, "Patching AWSCluster to ready") + Eventually(func() error { + if err := aws.PatchAWSClusterReady(context.Background(), standaloneClient, hd.GetName()); err != nil { + _, _ = fmt.Fprintf(GinkgoWriter, "failed to patch AWSCluster to ready: %v, retrying...\n", err) + return err + } + _, _ = fmt.Fprintf(GinkgoWriter, "Patch succeeded\n") + return nil + }).WithTimeout(time.Minute).WithPolling(5 * time.Second).Should(Succeed()) + + // Verify the hosted cluster is running/ready. + templateBy(managedcluster.TemplateAWSHostedCP, "waiting for infrastructure to deploy successfully") + deploymentValidator = managedcluster.NewProviderValidator( + managedcluster.TemplateAWSHostedCP, + hdName, + managedcluster.ValidationActionDeploy, + ) + Eventually(func() error { + return deploymentValidator.Validate(context.Background(), standaloneClient) + }).WithTimeout(30 * time.Minute).WithPolling(10 * time.Second).Should(Succeed()) + + // Delete the hosted ManagedCluster and verify it is removed. + templateBy(managedcluster.TemplateAWSHostedCP, "deleting the ManagedCluster") + err = hostedDeleteFunc() + Expect(err).NotTo(HaveOccurred()) + + deletionValidator := managedcluster.NewProviderValidator( + managedcluster.TemplateAWSHostedCP, + hdName, + managedcluster.ValidationActionDelete, + ) + Eventually(func() error { + return deletionValidator.Validate(context.Background(), standaloneClient) + }).WithTimeout(10 * time.Minute).WithPolling(10 * time.Second).Should(Succeed()) + + // Now delete the standalone ManagedCluster and verify it is + // removed, it is deleted last since it is the basis for the hosted + // cluster. + /* + FIXME(#339): This is currently disabled as the deletion of the + standalone cluster is failing due to outstanding issues. + templateBy(managedcluster.TemplateAWSStandaloneCP, "deleting the ManagedCluster") + err = standaloneDeleteFunc() Expect(err).NotTo(HaveOccurred()) + + deletionValidator = managedcluster.NewProviderValidator( + managedcluster.TemplateAWSStandaloneCP, + clusterName, + managedcluster.ValidationActionDelete, + ) Eventually(func() error { - return managedcluster.VerifyProviderDeleted(context.Background(), kc, clusterName) - }).WithTimeout(10 * time.Minute).WithPolling(10 * time.Second).Should(Succeed()) - }) - } + return deletionValidator.Validate(context.Background(), kc) + }).WithTimeout(10 * time.Minute).WithPolling(10 * + time.Second).Should(Succeed()) + */ + }) }) Context("vSphere templates", func() { @@ -182,15 +266,14 @@ var _ = Describe("controller", Ordered, func() { } By("ensuring that env vars are set correctly") - Expect(vsphereutils.CheckEnv()).Should(Succeed()) + vsphere.CheckEnv() By("creating kube client") - kc, err = kubeclient.NewFromLocal(namespace) - Expect(err).NotTo(HaveOccurred()) + kc := kubeclient.NewFromLocal(namespace) By("providing cluster identity") credSecretName := "vsphere-cluster-identity-secret-e2e" clusterIdentityName := "vsphere-cluster-identity-e2e" - Expect(kc.CreateVSphereSecret(credSecretName)).Should(Succeed()) - Expect(kc.CreateVSphereClusterIdentity(credSecretName, clusterIdentityName)).Should(Succeed()) + Expect(vsphere.CreateSecret(kc, credSecretName)).Should(Succeed()) + Expect(vsphere.CreateClusterIdentity(kc, credSecretName, clusterIdentityName)).Should(Succeed()) By("setting VSPHERE_CLUSTER_IDENTITY env variable") Expect(os.Setenv("VSPHERE_CLUSTER_IDENTITY", clusterIdentityName)).Should(Succeed()) }) @@ -216,54 +299,39 @@ var _ = Describe("controller", Ordered, func() { d := managedcluster.GetUnstructured(managedcluster.TemplateVSphereStandaloneCP) clusterName = d.GetName() - deleteFunc, err = kc.CreateManagedCluster(context.Background(), d) - Expect(err).NotTo(HaveOccurred()) + deleteFunc := kc.CreateManagedCluster(context.Background(), d) By("waiting for infrastructure providers to deploy successfully") + deploymentValidator := managedcluster.NewProviderValidator( + managedcluster.TemplateVSphereStandaloneCP, + clusterName, + managedcluster.ValidationActionDeploy, + ) Eventually(func() error { - return managedcluster.VerifyProviderDeployed(context.Background(), kc, clusterName, managedcluster.ProviderVSphere) + return deploymentValidator.Validate(context.Background(), kc) }).WithTimeout(30 * time.Minute).WithPolling(10 * time.Second).Should(Succeed()) + deletionValidator := managedcluster.NewProviderValidator( + managedcluster.TemplateVSphereStandaloneCP, + clusterName, + managedcluster.ValidationActionDelete, + ) By("verify the deployment deletes successfully") err = deleteFunc() Expect(err).NotTo(HaveOccurred()) Eventually(func() error { - return managedcluster.VerifyProviderDeleted(context.Background(), kc, clusterName) + return deletionValidator.Validate(context.Background(), kc) }).WithTimeout(10 * time.Minute).WithPolling(10 * time.Second).Should(Succeed()) }) }) }) -func verifyControllerUp(kc *kubeclient.KubeClient, labelSelector string, name string) error { - deployList, err := kc.Client.AppsV1().Deployments(kc.Namespace).List(context.Background(), metav1.ListOptions{ - LabelSelector: labelSelector, - }) - if err != nil { - return fmt.Errorf("failed to list %s controller deployments: %w", name, err) - } - - if len(deployList.Items) < 1 { - return fmt.Errorf("expected at least 1 %s controller deployment, got %d", - name, len(deployList.Items)) - } - - deployment := deployList.Items[0] - - // Ensure the deployment is not being deleted. - if deployment.DeletionTimestamp != nil { - return fmt.Errorf("controller pod: %s deletion timestamp should be nil, got: %v", - deployment.Name, deployment.DeletionTimestamp) - } - // Ensure the deployment is running and has the expected name. - if !strings.Contains(deployment.Name, "controller-manager") { - return fmt.Errorf("controller deployment name %s does not contain 'controller-manager'", deployment.Name) - } - if deployment.Status.ReadyReplicas < 1 { - return fmt.Errorf("controller deployment: %s does not yet have any ReadyReplicas", deployment.Name) - } - - return nil +// templateBy wraps a Ginkgo By with a block describing the template being +// tested. +func templateBy(t managedcluster.Template, description string) { + GinkgoHelper() + By(fmt.Sprintf("[%s] %s", t, description)) } // collectLogArtfiacts collects log output from each the HMC controller, @@ -275,6 +343,14 @@ func collectLogArtifacts(kc *kubeclient.KubeClient, clusterName string, provider filterLabels := []string{hmcControllerLabel} + var host string + hostURL, err := url.Parse(kc.Config.Host) + if err != nil { + utils.WarnError(fmt.Errorf("failed to parse host from kubeconfig: %w", err)) + } else { + host = strings.ReplaceAll(hostURL.Host, ":", "_") + } + for _, providerType := range providerTypes { filterLabels = append(filterLabels, managedcluster.GetProviderLabel(providerType)) } @@ -295,7 +371,7 @@ func collectLogArtifacts(kc *kubeclient.KubeClient, clusterName string, provider } defer podLogs.Close() //nolint:errcheck - output, err := os.Create(fmt.Sprintf("./test/e2e/%s.log", pod.Name)) + output, err := os.Create(fmt.Sprintf("./test/e2e/%s.log", host+"-"+pod.Name)) if err != nil { utils.WarnError(fmt.Errorf("failed to create log file for pod %s: %w", pod.Name, err)) continue @@ -318,8 +394,17 @@ func collectLogArtifacts(kc *kubeclient.KubeClient, clusterName string, provider return } - err = os.WriteFile(filepath.Join("test/e2e", "clusterctl.log"), output, 0644) + err = os.WriteFile(filepath.Join("test/e2e", host+"-"+"clusterctl.log"), output, 0644) if err != nil { utils.WarnError(fmt.Errorf("failed to write clusterctl log: %w", err)) } } + +func noCleanup() bool { + noCleanup := os.Getenv(managedcluster.EnvVarNoCleanup) + if noCleanup != "" { + By(fmt.Sprintf("skipping After nodes as %s is set", managedcluster.EnvVarNoCleanup)) + } + + return noCleanup != "" +} diff --git a/test/kubeclient/kubeclient.go b/test/kubeclient/kubeclient.go index 7f9cae978..dff04c58f 100644 --- a/test/kubeclient/kubeclient.go +++ b/test/kubeclient/kubeclient.go @@ -18,12 +18,10 @@ import ( "context" "fmt" "os" - "os/exec" "path/filepath" - "github.com/Mirantis/hmc/test/utils" . "github.com/onsi/ginkgo/v2" - corev1 "k8s.io/api/core/v1" + . "github.com/onsi/gomega" apiextensionsclientset "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -35,10 +33,6 @@ import ( "k8s.io/client-go/tools/clientcmd" ) -const ( - awsCredentialsSecretName = "aws-variables" -) - type KubeClient struct { Namespace string @@ -49,213 +43,126 @@ type KubeClient struct { // NewFromLocal creates a new instance of KubeClient from a given namespace // using the locally found kubeconfig. -func NewFromLocal(namespace string) (*KubeClient, error) { - configBytes, err := getLocalKubeConfig() - if err != nil { - return nil, fmt.Errorf("failed to get local kubeconfig: %w", err) - } - - return new(configBytes, namespace) +func NewFromLocal(namespace string) *KubeClient { + GinkgoHelper() + return new(getLocalKubeConfig(), namespace) } // NewFromCluster creates a new KubeClient using the kubeconfig stored in the // secret affiliated with the given clusterName. Since it relies on fetching // the kubeconfig from secret it needs an existing kubeclient. -func (kc *KubeClient) NewFromCluster(ctx context.Context, namespace, clusterName string) (*KubeClient, error) { - secret, err := kc.Client.CoreV1().Secrets(kc.Namespace).Get(ctx, clusterName+"-kubeconfig", metav1.GetOptions{}) - if err != nil { - return nil, fmt.Errorf("failed to get cluster: %q kubeconfig secret: %w", clusterName, err) +func (kc *KubeClient) NewFromCluster(ctx context.Context, namespace, clusterName string) *KubeClient { + GinkgoHelper() + return new(kc.getKubeconfigSecretData(ctx, clusterName), namespace) +} + +// WriteKubeconfig writes the kubeconfig for the given clusterName to the +// test/e2e directory returning the path to the file and a function to delete +// it later. +func (kc *KubeClient) WriteKubeconfig(ctx context.Context, clusterName string) (string, func() error) { + GinkgoHelper() + + secretData := kc.getKubeconfigSecretData(ctx, clusterName) + + dir, err := os.Getwd() + Expect(err).NotTo(HaveOccurred()) + + path := filepath.Join(dir, clusterName+"-kubeconfig") + + Expect( + os.WriteFile(path, secretData, 0644)). + To(Succeed()) + + deleteFunc := func() error { + return os.Remove(filepath.Join(dir, path)) } + return path, deleteFunc +} + +func (kc *KubeClient) getKubeconfigSecretData(ctx context.Context, clusterName string) []byte { + GinkgoHelper() + + secret, err := kc.Client.CoreV1().Secrets(kc.Namespace).Get(ctx, clusterName+"-kubeconfig", metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred(), "failed to get cluster: %q kubeconfig secret", clusterName) + secretData, ok := secret.Data["value"] - if !ok { - return nil, fmt.Errorf("kubeconfig secret %q has no 'value' key", clusterName) - } + Expect(ok).To(BeTrue(), "kubeconfig secret %q has no 'value' key", clusterName) - return new(secretData, namespace) + return secretData } // getLocalKubeConfig returns the kubeconfig file content. -func getLocalKubeConfig() ([]byte, error) { +func getLocalKubeConfig() []byte { + GinkgoHelper() + // Use the KUBECONFIG environment variable if it is set, otherwise use the // default path. kubeConfig, ok := os.LookupEnv("KUBECONFIG") if !ok { homeDir, err := os.UserHomeDir() - if err != nil { - return nil, fmt.Errorf("failed to get user home directory: %w", err) - } + Expect(err).NotTo(HaveOccurred(), "failed to get user home directory") kubeConfig = filepath.Join(homeDir, ".kube", "config") } configBytes, err := os.ReadFile(kubeConfig) - if err != nil { - return nil, fmt.Errorf("failed to read %q: %w", kubeConfig, err) - } + Expect(err).NotTo(HaveOccurred(), "failed to read %q", kubeConfig) - return configBytes, nil + return configBytes } // new creates a new instance of KubeClient from a given namespace using // the local kubeconfig. -func new(configBytes []byte, namespace string) (*KubeClient, error) { +func new(configBytes []byte, namespace string) *KubeClient { + GinkgoHelper() + config, err := clientcmd.RESTConfigFromKubeConfig(configBytes) - if err != nil { - return nil, fmt.Errorf("failed to parse kubeconfig: %w", err) - } + Expect(err).NotTo(HaveOccurred(), "failed to parse kubeconfig") clientSet, err := kubernetes.NewForConfig(config) - if err != nil { - return nil, fmt.Errorf("could not initialize kubernetes client: %w", err) - } + Expect(err).NotTo(HaveOccurred(), "failed to initialize kubernetes client") extendedClientSet, err := apiextensionsclientset.NewForConfig(config) - if err != nil { - return nil, fmt.Errorf("failed to initialize apiextensions clientset: %w", err) - } + Expect(err).NotTo(HaveOccurred(), "failed to initialize apiextensions clientset") return &KubeClient{ Namespace: namespace, Client: clientSet, ExtendedClient: extendedClientSet, Config: config, - }, nil -} - -// CreateAWSCredentialsKubeSecret uses clusterawsadm to encode existing AWS -// credentials and create a secret in the given namespace if one does not -// already exist. -func (kc *KubeClient) CreateAWSCredentialsKubeSecret(ctx context.Context) error { - _, err := kc.Client.CoreV1().Secrets(kc.Namespace).Get(ctx, awsCredentialsSecretName, metav1.GetOptions{}) - if !apierrors.IsNotFound(err) { - return nil - } - - cmd := exec.Command("./bin/clusterawsadm", "bootstrap", "credentials", "encode-as-profile") - output, err := utils.Run(cmd) - if err != nil { - return fmt.Errorf("failed to encode AWS credentials with clusterawsadm: %w", err) - } - - _, err = kc.Client.CoreV1().Secrets(kc.Namespace).Create(ctx, &corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{ - Name: awsCredentialsSecretName, - }, - Data: map[string][]byte{ - "AWS_B64ENCODED_CREDENTIALS": output, - }, - Type: corev1.SecretTypeOpaque, - }, metav1.CreateOptions{}) - if err != nil { - return fmt.Errorf("failed to create AWS credentials secret: %w", err) - } - - return nil -} - -func (kc *KubeClient) CreateVSphereSecret(secretName string) error { - ctx := context.Background() - _, err := kc.Client.CoreV1().Secrets(kc.Namespace).Get(ctx, secretName, metav1.GetOptions{}) - - if !apierrors.IsNotFound(err) { - return nil } - username := os.Getenv("VSPHERE_USER") - password := os.Getenv("VSPHERE_PASSWORD") - - _, err = kc.Client.CoreV1().Secrets(kc.Namespace).Create(ctx, &corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{ - Name: secretName, - }, - StringData: map[string]string{ - "username": username, - "password": password, - }, - Type: corev1.SecretTypeOpaque, - }, metav1.CreateOptions{}) - - if err != nil { - return fmt.Errorf("failed to create vSphere credentials secret: %w", err) - } - - return nil -} - -func (kc *KubeClient) CreateVSphereClusterIdentity(secretName string, identityName string) error { - ctx := context.Background() - client, err := dynamic.NewForConfig(kc.Config) - - if err != nil { - return fmt.Errorf("failed to create dynamic client: %w", err) - } - - gvr := schema.GroupVersionResource{ - Group: "infrastructure.cluster.x-k8s.io", - Version: "v1beta1", - Resource: "vsphereclusteridentities", - } - - clusterIdentity := &unstructured.Unstructured{ - Object: map[string]interface{}{ - "apiVersion": "infrastructure.cluster.x-k8s.io/v1beta1", - "kind": "VSphereClusterIdentity", - "metadata": map[string]interface{}{ - "name": identityName, - }, - "spec": map[string]interface{}{ - "secretName": secretName, - "allowedNamespaces": map[string]interface{}{ - "selector": map[string]interface{}{ - "matchLabels": map[string]interface{}{}, - }, - }, - }, - }, - } - - result, err := client.Resource(gvr).Create(ctx, clusterIdentity, metav1.CreateOptions{}) - if err != nil { - fmt.Printf("%+v", result) - return fmt.Errorf("Failed to create vsphereclusteridentity: %w", err) - } - - return nil } // GetDynamicClient returns a dynamic client for the given GroupVersionResource. -func (kc *KubeClient) GetDynamicClient(gvr schema.GroupVersionResource) (dynamic.ResourceInterface, error) { +func (kc *KubeClient) GetDynamicClient(gvr schema.GroupVersionResource) dynamic.ResourceInterface { + GinkgoHelper() + client, err := dynamic.NewForConfig(kc.Config) - if err != nil { - return nil, fmt.Errorf("failed to create dynamic client: %w", err) - } + Expect(err).NotTo(HaveOccurred(), "failed to create dynamic client") - return client.Resource(gvr).Namespace(kc.Namespace), nil + return client.Resource(gvr).Namespace(kc.Namespace) } -// CreateDeployment creates a managedcluster.hmc.mirantis.com in the given +// CreateManagedCluster creates a managedcluster.hmc.mirantis.com in the given // namespace and returns a DeleteFunc to clean up the deployment. // The DeleteFunc is a no-op if the deployment has already been deleted. func (kc *KubeClient) CreateManagedCluster( - ctx context.Context, managedcluster *unstructured.Unstructured) (func() error, error) { - kind := managedcluster.GetKind() + ctx context.Context, managedcluster *unstructured.Unstructured) func() error { + GinkgoHelper() - if kind != "ManagedCluster" { - return nil, fmt.Errorf("expected kind ManagedCluster, got: %s", kind) - } + kind := managedcluster.GetKind() + Expect(kind).To(Equal("ManagedCluster")) - client, err := kc.GetDynamicClient(schema.GroupVersionResource{ + client := kc.GetDynamicClient(schema.GroupVersionResource{ Group: "hmc.mirantis.com", Version: "v1alpha1", Resource: "managedclusters", }) - if err != nil { - return nil, fmt.Errorf("failed to get dynamic client: %w", err) - } - _, err = client.Create(ctx, managedcluster, metav1.CreateOptions{}) - if err != nil { - return nil, fmt.Errorf("failed to create Deployment: %w", err) + _, err := client.Create(ctx, managedcluster, metav1.CreateOptions{}) + if !apierrors.IsAlreadyExists(err) { + Expect(err).NotTo(HaveOccurred(), "failed to create %s", kind) } return func() error { @@ -264,7 +171,7 @@ func (kc *KubeClient) CreateManagedCluster( return nil } return err - }, nil + } } // GetCluster returns a Cluster resource by name. @@ -275,10 +182,7 @@ func (kc *KubeClient) GetCluster(ctx context.Context, clusterName string) (*unst Resource: "clusters", } - client, err := kc.GetDynamicClient(gvr) - if err != nil { - Fail(fmt.Sprintf("failed to get %s client: %v", gvr.Resource, err)) - } + client := kc.GetDynamicClient(gvr) cluster, err := client.Get(ctx, clusterName, metav1.GetOptions{}) if err != nil { @@ -292,16 +196,13 @@ func (kc *KubeClient) GetCluster(ctx context.Context, clusterName string) (*unst // affiliated with the given clusterName. func (kc *KubeClient) listResource( ctx context.Context, gvr schema.GroupVersionResource, clusterName string) ([]unstructured.Unstructured, error) { - client, err := kc.GetDynamicClient(gvr) - if err != nil { - Fail(fmt.Sprintf("failed to get %s client: %v", gvr.Resource, err)) - } + client := kc.GetDynamicClient(gvr) resources, err := client.List(ctx, metav1.ListOptions{ LabelSelector: "cluster.x-k8s.io/cluster-name=" + clusterName, }) if err != nil { - return nil, fmt.Errorf("failed to list %s: %w", gvr.Resource, err) + return nil, fmt.Errorf("failed to list %s", gvr.Resource) } return resources.Items, nil @@ -309,6 +210,8 @@ func (kc *KubeClient) listResource( // ListMachines returns a list of Machine resources for the given cluster. func (kc *KubeClient) ListMachines(ctx context.Context, clusterName string) ([]unstructured.Unstructured, error) { + GinkgoHelper() + return kc.listResource(ctx, schema.GroupVersionResource{ Group: "cluster.x-k8s.io", Version: "v1beta1", @@ -320,6 +223,8 @@ func (kc *KubeClient) ListMachines(ctx context.Context, clusterName string) ([]u // given cluster. func (kc *KubeClient) ListMachineDeployments( ctx context.Context, clusterName string) ([]unstructured.Unstructured, error) { + GinkgoHelper() + return kc.listResource(ctx, schema.GroupVersionResource{ Group: "cluster.x-k8s.io", Version: "v1beta1", @@ -329,6 +234,8 @@ func (kc *KubeClient) ListMachineDeployments( func (kc *KubeClient) ListK0sControlPlanes( ctx context.Context, clusterName string) ([]unstructured.Unstructured, error) { + GinkgoHelper() + return kc.listResource(ctx, schema.GroupVersionResource{ Group: "controlplane.cluster.x-k8s.io", Version: "v1beta1", diff --git a/test/managedcluster/aws/aws.go b/test/managedcluster/aws/aws.go new file mode 100644 index 000000000..11cd276ba --- /dev/null +++ b/test/managedcluster/aws/aws.go @@ -0,0 +1,137 @@ +// Copyright 2024 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file contains specific helpers for testing a managed cluster +// that uses the AWS infrastructure provider. +package aws + +import ( + "context" + "encoding/json" + "os" + "os/exec" + + corev1 "k8s.io/api/core/v1" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/dynamic" + + "github.com/Mirantis/hmc/test/kubeclient" + "github.com/Mirantis/hmc/test/managedcluster" + "github.com/Mirantis/hmc/test/utils" +) + +// CreateCredentialSecret uses clusterawsadm to encode existing AWS +// credentials and create a secret in the given namespace if one does not +// already exist. +func CreateCredentialSecret(ctx context.Context, kc *kubeclient.KubeClient) { + GinkgoHelper() + + _, err := kc.Client.CoreV1().Secrets(kc.Namespace). + Get(ctx, managedcluster.AWSCredentialsSecretName, metav1.GetOptions{}) + if !apierrors.IsNotFound(err) { + Expect(err).NotTo(HaveOccurred(), "failed to get AWS credentials secret") + return + } + + cmd := exec.Command("./bin/clusterawsadm", "bootstrap", "credentials", "encode-as-profile") + output, err := utils.Run(cmd) + Expect(err).NotTo(HaveOccurred(), "failed to encode AWS credentials with clusterawsadm") + + _, err = kc.Client.CoreV1().Secrets(kc.Namespace).Create(ctx, &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: managedcluster.AWSCredentialsSecretName, + }, + Data: map[string][]byte{ + "AWS_B64ENCODED_CREDENTIALS": output, + }, + Type: corev1.SecretTypeOpaque, + }, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred(), "failed to create AWS credentials secret") +} + +// PopulateHostedTemplateVars populates the environment variables required for +// the AWS hosted CP template by querying the standalone CP cluster with the +// given kubeclient. +func PopulateHostedTemplateVars(ctx context.Context, kc *kubeclient.KubeClient) { + GinkgoHelper() + + c := getAWSClusterClient(kc) + awsCluster, err := c.Get(ctx, os.Getenv(managedcluster.EnvVarManagedClusterName), metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred(), "failed to get AWS cluster") + + vpcID, found, err := unstructured.NestedString(awsCluster.Object, "spec", "network", "vpc", "id") + Expect(err).NotTo(HaveOccurred(), "failed to get AWS cluster VPC ID") + Expect(found).To(BeTrue(), "AWS cluster has no VPC ID") + + subnets, found, err := unstructured.NestedSlice(awsCluster.Object, "spec", "network", "subnets") + Expect(err).NotTo(HaveOccurred(), "failed to get AWS cluster subnets") + Expect(found).To(BeTrue(), "AWS cluster has no subnets") + + subnet, ok := subnets[0].(map[string]interface{}) + Expect(ok).To(BeTrue(), "failed to cast subnet to map") + + subnetID, ok := subnet["resourceID"].(string) + Expect(ok).To(BeTrue(), "failed to cast subnet ID to string") + + subnetAZ, ok := subnet["availabilityZone"].(string) + Expect(ok).To(BeTrue(), "failed to cast subnet availability zone to string") + + securityGroupID, found, err := unstructured.NestedString( + awsCluster.Object, "status", "networkStatus", "securityGroups", "node", "id") + Expect(err).NotTo(HaveOccurred(), "failed to get AWS cluster security group ID") + Expect(found).To(BeTrue(), "AWS cluster has no security group ID") + + GinkgoT().Setenv(managedcluster.EnvVarAWSVPCID, vpcID) + GinkgoT().Setenv(managedcluster.EnvVarAWSSubnetID, subnetID) + GinkgoT().Setenv(managedcluster.EnvVarAWSSubnetAvailabilityZone, subnetAZ) + GinkgoT().Setenv(managedcluster.EnvVarAWSSecurityGroupID, securityGroupID) +} + +func PatchAWSClusterReady(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { + GinkgoHelper() + + c := getAWSClusterClient(kc) + + trueStatus := map[string]interface{}{ + "status": map[string]interface{}{ + "ready": true, + }, + } + + patchBytes, err := json.Marshal(trueStatus) + Expect(err).NotTo(HaveOccurred(), "failed to marshal patch bytes") + + _, err = c.Patch(ctx, clusterName, types.MergePatchType, + patchBytes, metav1.PatchOptions{}, "status") + if err != nil { + return err + } + + return nil +} + +func getAWSClusterClient(kc *kubeclient.KubeClient) dynamic.ResourceInterface { + return kc.GetDynamicClient(schema.GroupVersionResource{ + Group: "infrastructure.cluster.x-k8s.io", + Version: "v1beta2", + Resource: "awsclusters", + }) +} diff --git a/test/managedcluster/constants.go b/test/managedcluster/constants.go new file mode 100644 index 000000000..cd43527fb --- /dev/null +++ b/test/managedcluster/constants.go @@ -0,0 +1,37 @@ +// Copyright 2024 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package managedcluster + +const ( + // Common + EnvVarManagedClusterName = "MANAGED_CLUSTER_NAME" + EnvVarHostedManagedClusterName = "HOSTED_MANAGED_CLUSTER_NAME" + EnvVarInstallBeachHeadServices = "INSTALL_BEACH_HEAD_SERVICES" + EnvVarControlPlaneNumber = "CONTROL_PLANE_NUMBER" + EnvVarWorkerNumber = "WORKER_NUMBER" + EnvVarNamespace = "NAMESPACE" + // EnvVarNoCleanup disables After* cleanup in provider specs to allow for + // debugging of test failures. + EnvVarNoCleanup = "NO_CLEANUP" + + // AWS + EnvVarAWSVPCID = "AWS_VPC_ID" + EnvVarAWSSubnetID = "AWS_SUBNET_ID" + EnvVarAWSSubnetAvailabilityZone = "AWS_SUBNET_AVAILABILITY_ZONE" + EnvVarAWSInstanceType = "AWS_INSTANCE_TYPE" + EnvVarAWSSecurityGroupID = "AWS_SG_ID" + EnvVarPublicIP = "AWS_PUBLIC_IP" + AWSCredentialsSecretName = "aws-variables" +) diff --git a/test/managedcluster/managedcluster.go b/test/managedcluster/managedcluster.go index af28952e3..2a08b53e7 100644 --- a/test/managedcluster/managedcluster.go +++ b/test/managedcluster/managedcluster.go @@ -18,6 +18,7 @@ import ( _ "embed" "fmt" "os" + "strings" "github.com/a8m/envsubst" "github.com/google/uuid" @@ -68,23 +69,43 @@ func GetProviderLabel(provider ProviderType) string { func GetUnstructured(templateName Template) *unstructured.Unstructured { GinkgoHelper() - generatedName := uuid.New().String()[:8] + "-e2e-test" - _, _ = fmt.Fprintf(GinkgoWriter, "Generated cluster name: %q\n", generatedName) + generatedName := os.Getenv(EnvVarManagedClusterName) + if generatedName == "" { + generatedName = uuid.New().String()[:8] + "-e2e-test" + _, _ = fmt.Fprintf(GinkgoWriter, "Generated cluster name: %q\n", generatedName) + GinkgoT().Setenv(EnvVarManagedClusterName, generatedName) + } else { + _, _ = fmt.Fprintf(GinkgoWriter, "Using configured cluster name: %q\n", generatedName) + } - Expect(os.Setenv("MANAGED_CLUSTER_NAME", generatedName)).NotTo(HaveOccurred()) + var hostedName string + if strings.Contains(string(templateName), "-hosted") { + hostedName = generatedName + "-hosted" + GinkgoT().Setenv(EnvVarHostedManagedClusterName, hostedName) + _, _ = fmt.Fprintf(GinkgoWriter, "Creating hosted ManagedCluster with name: %q\n", hostedName) + } var managedClusterTemplateBytes []byte switch templateName { case TemplateAWSStandaloneCP: managedClusterTemplateBytes = awsStandaloneCPManagedClusterTemplateBytes case TemplateAWSHostedCP: + // Validate environment vars that do not have defaults are populated. + // We perform this validation here instead of within a Before block + // since we populate the vars from standalone prior to this step. + ValidateDeploymentVars([]string{ + EnvVarAWSVPCID, + EnvVarAWSSubnetID, + EnvVarAWSSubnetAvailabilityZone, + EnvVarAWSSecurityGroupID, + }) managedClusterTemplateBytes = awsHostedCPManagedClusterTemplateBytes case TemplateVSphereStandaloneCP: managedClusterTemplateBytes = vsphereStandaloneCPManagedClusterTemplateBytes case TemplateVSphereHostedCP: managedClusterTemplateBytes = vsphereHostedCPManagedClusterTemplateBytes default: - Fail(fmt.Sprintf("unsupported template type: %s", templateName)) + Fail(fmt.Sprintf("unsupported AWS template: %s", templateName)) } managedClusterConfigBytes, err := envsubst.Bytes(managedClusterTemplateBytes) @@ -97,3 +118,11 @@ func GetUnstructured(templateName Template) *unstructured.Unstructured { return &unstructured.Unstructured{Object: managedClusterConfig} } + +func ValidateDeploymentVars(v []string) { + GinkgoHelper() + + for _, envVar := range v { + Expect(os.Getenv(envVar)).NotTo(BeEmpty(), envVar+" must be set") + } +} diff --git a/test/managedcluster/providervalidator.go b/test/managedcluster/providervalidator.go new file mode 100644 index 000000000..fd474253c --- /dev/null +++ b/test/managedcluster/providervalidator.go @@ -0,0 +1,113 @@ +// Copyright 2024 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package managedcluster + +import ( + "context" + "fmt" + + "github.com/Mirantis/hmc/test/kubeclient" + . "github.com/onsi/ginkgo/v2" +) + +// ProviderValidator is a struct that contains the necessary information to +// validate a provider's resources. Some providers do not support all of the +// resources that can potentially be validated. +type ProviderValidator struct { + // Template is the name of the template being validated. + template Template + // ClusterName is the name of the cluster to validate. + clusterName string + // ResourcesToValidate is a map of resource names to their validation + // function. + resourcesToValidate map[string]resourceValidationFunc + // ResourceOrder is a slice of resource names that determines the order in + // which resources are validated. + resourceOrder []string +} + +type ValidationAction string + +const ( + ValidationActionDeploy ValidationAction = "deploy" + ValidationActionDelete ValidationAction = "delete" +) + +func NewProviderValidator(template Template, clusterName string, action ValidationAction) *ProviderValidator { + var ( + resourcesToValidate map[string]resourceValidationFunc + resourceOrder []string + ) + + if action == ValidationActionDeploy { + resourcesToValidate = map[string]resourceValidationFunc{ + "clusters": validateCluster, + "machines": validateMachines, + "control-planes": validateK0sControlPlanes, + "csi-driver": validateCSIDriver, + } + resourceOrder = []string{"clusters", "machines", "control-planes", "csi-driver"} + + switch template { + case TemplateAWSStandaloneCP, TemplateAWSHostedCP: + resourcesToValidate["ccm"] = validateCCM + resourceOrder = append(resourceOrder, "ccm") + } + } else { + resourcesToValidate = map[string]resourceValidationFunc{ + "clusters": validateClusterDeleted, + "machinedeployments": validateMachineDeploymentsDeleted, + "control-planes": validateK0sControlPlanesDeleted, + } + resourceOrder = []string{"clusters", "machinedeployments", "control-planes"} + } + + return &ProviderValidator{ + template: template, + clusterName: clusterName, + resourcesToValidate: resourcesToValidate, + resourceOrder: resourceOrder, + } +} + +// Validate is a provider-agnostic verification that checks for +// a specific set of resources and either validates their readiness or +// their deletion depending on the passed map of resourceValidationFuncs and +// desired order. +// It is meant to be used in conjunction with an Eventually block. +// In some cases it may be necessary to end the Eventually block early if the +// resource will never reach a ready state, in these instances Ginkgo's Fail +// should be used to end the spec early. +func (p *ProviderValidator) Validate(ctx context.Context, kc *kubeclient.KubeClient) error { + // Sequentially validate each resource type, only returning the first error + // as to not move on to the next resource type until the first is resolved. + // We use []string here since order is important. + for _, name := range p.resourceOrder { + validator, ok := p.resourcesToValidate[name] + if !ok { + continue + } + + if err := validator(ctx, kc, p.clusterName); err != nil { + _, _ = fmt.Fprintf(GinkgoWriter, "[%s/%s] validation error: %v\n", p.template, name, err) + return err + } + + _, _ = fmt.Fprintf(GinkgoWriter, "[%s/%s] validation succeeded\n", p.template, name) + delete(p.resourcesToValidate, name) + } + + return nil +} diff --git a/test/managedcluster/resources/aws-hosted-cp.yaml.tpl b/test/managedcluster/resources/aws-hosted-cp.yaml.tpl index 894bb6667..06a4cf4cc 100644 --- a/test/managedcluster/resources/aws-hosted-cp.yaml.tpl +++ b/test/managedcluster/resources/aws-hosted-cp.yaml.tpl @@ -1,16 +1,15 @@ apiVersion: hmc.mirantis.com/v1alpha1 kind: ManagedCluster metadata: - name: ${MANAGED_CLUSTER_NAME} + name: ${HOSTED_MANAGED_CLUSTER_NAME} spec: template: aws-hosted-cp config: vpcID: ${AWS_VPC_ID} region: ${AWS_REGION} - publicIP: ${PUBLIC_IP:=true} subnets: - id: ${AWS_SUBNET_ID} availabilityZone: ${AWS_SUBNET_AVAILABILITY_ZONE} - instanceType: ${INSTANCE_TYPE:=t3.medium} + instanceType: ${AWS_INSTANCE_TYPE:=t3.medium} securityGroupIDs: - ${AWS_SG_ID} diff --git a/test/managedcluster/resources/aws-standalone-cp.yaml.tpl b/test/managedcluster/resources/aws-standalone-cp.yaml.tpl index 7825a2833..0d107ca43 100644 --- a/test/managedcluster/resources/aws-standalone-cp.yaml.tpl +++ b/test/managedcluster/resources/aws-standalone-cp.yaml.tpl @@ -6,12 +6,13 @@ spec: template: aws-standalone-cp config: region: ${AWS_REGION} - publicIP: ${PUBLIC_IP:=true} + publicIP: ${AWS_PUBLIC_IP:=true} controlPlaneNumber: ${CONTROL_PLANE_NUMBER:=1} workersNumber: ${WORKERS_NUMBER:=1} controlPlane: - instanceType: ${INSTANCE_TYPE:=t3.small} + instanceType: ${AWS_INSTANCE_TYPE:=t3.small} worker: - instanceType: ${INSTANCE_TYPE:=t3.small} + instanceType: ${AWS_INSTANCE_TYPE:=t3.small} + installBeachHeadServices: ${INSTALL_BEACH_HEAD_SERVICES:=true} diff --git a/test/managedcluster/validate_deleted.go b/test/managedcluster/validate_deleted.go index 8305fe62c..c7999e85e 100644 --- a/test/managedcluster/validate_deleted.go +++ b/test/managedcluster/validate_deleted.go @@ -24,20 +24,6 @@ import ( "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" ) -var deletionValidators = map[string]resourceValidationFunc{ - "clusters": validateClusterDeleted, - "machinedeployments": validateMachineDeploymentsDeleted, - "control-planes": validateK0sControlPlanesDeleted, -} - -// VerifyProviderDeleted is a provider-agnostic verification that checks -// to ensure generic resources managed by the provider have been deleted. -// It is intended to be used in conjunction with an Eventually block. -func VerifyProviderDeleted(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { - return verifyProviderAction(ctx, kc, clusterName, deletionValidators, - []string{"clusters", "machinedeployments", "control-planes"}) -} - // validateClusterDeleted validates that the Cluster resource has been deleted. func validateClusterDeleted(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { // Validate that the Cluster resource has been deleted @@ -53,7 +39,7 @@ func validateClusterDeleted(ctx context.Context, kc *kubeclient.KubeClient, clus // like this, we probably don't want to wait the full Eventually // for something like this, but we can't immediately fail the test // either. - return fmt.Errorf("cluster %q exists, but is not in 'Deleting' phase", clusterName) + return fmt.Errorf("cluster: %q exists, but is not in 'Deleting' phase", clusterName) } conditions, err := utils.GetConditionsFromUnstructured(cluster) diff --git a/test/managedcluster/validate_deployed.go b/test/managedcluster/validate_deployed.go index b327d4f6f..b80d84204 100644 --- a/test/managedcluster/validate_deployed.go +++ b/test/managedcluster/validate_deployed.go @@ -34,61 +34,6 @@ import ( // resource. type resourceValidationFunc func(context.Context, *kubeclient.KubeClient, string) error -var resourceValidators = map[string]resourceValidationFunc{ - "clusters": validateCluster, - "machines": validateMachines, - "control-planes": validateK0sControlPlanes, - "csi-driver": validateCSIDriver, - "ccm": validateCCM, -} - -// VerifyProviderDeployed is a provider-agnostic verification that checks -// to ensure generic resources managed by the provider have been deleted. -// It is intended to be used in conjunction with an Eventually block. -func VerifyProviderDeployed( - ctx context.Context, kc *kubeclient.KubeClient, clusterName string, - providerType ProviderType) error { - if providerType == ProviderVSphere { - return verifyProviderAction(ctx, kc, clusterName, resourceValidators, - []string{"clusters", "machines", "control-planes", "csi-driver"}) - } else { - return verifyProviderAction(ctx, kc, clusterName, resourceValidators, - []string{"clusters", "machines", "control-planes", "csi-driver", "ccm"}) - } -} - -// verifyProviderAction is a provider-agnostic verification that checks for -// a specific set of resources and either validates their readiness or -// their deletion depending on the passed map of resourceValidationFuncs and -// desired order. -// It is meant to be used in conjunction with an Eventually block. -// In some cases it may be necessary to end the Eventually block early if the -// resource will never reach a ready state, in these instances Ginkgo's Fail -// should be used to end the spec early. -func verifyProviderAction( - ctx context.Context, kc *kubeclient.KubeClient, clusterName string, - resourcesToValidate map[string]resourceValidationFunc, order []string) error { - // Sequentially validate each resource type, only returning the first error - // as to not move on to the next resource type until the first is resolved. - // We use []string here since order is important. - for _, name := range order { - validator, ok := resourcesToValidate[name] - if !ok { - continue - } - - if err := validator(ctx, kc, clusterName); err != nil { - _, _ = fmt.Fprintf(GinkgoWriter, "[%s] validation error: %v\n", name, err) - return err - } - - _, _ = fmt.Fprintf(GinkgoWriter, "[%s] validation succeeded\n", name) - delete(resourcesToValidate, name) - } - - return nil -} - func validateCluster(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { cluster, err := kc.GetCluster(ctx, clusterName) if err != nil { @@ -118,7 +63,7 @@ func validateCluster(ctx context.Context, kc *kubeclient.KubeClient, clusterName func validateMachines(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { machines, err := kc.ListMachines(ctx, clusterName) if err != nil { - return fmt.Errorf("failed to list machines: %w", err) + return err } for _, machine := range machines { @@ -137,7 +82,7 @@ func validateMachines(ctx context.Context, kc *kubeclient.KubeClient, clusterNam func validateK0sControlPlanes(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { controlPlanes, err := kc.ListK0sControlPlanes(ctx, clusterName) if err != nil { - return fmt.Errorf("failed to list K0sControlPlanes: %w", err) + return err } for _, controlPlane := range controlPlanes { @@ -178,14 +123,11 @@ func validateK0sControlPlanes(ctx context.Context, kc *kubeclient.KubeClient, cl // validateCSIDriver validates that the provider CSI driver is functioning // by creating a PVC and verifying it enters "Bound" status. func validateCSIDriver(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { - clusterKC, err := kc.NewFromCluster(ctx, "default", clusterName) - if err != nil { - Fail(fmt.Sprintf("failed to create KubeClient for managed cluster %s: %v", clusterName, err)) - } + clusterKC := kc.NewFromCluster(ctx, "default", clusterName) pvcName := clusterName + "-csi-test-pvc" - _, err = clusterKC.Client.CoreV1().PersistentVolumeClaims(clusterKC.Namespace). + _, err := clusterKC.Client.CoreV1().PersistentVolumeClaims(clusterKC.Namespace). Create(ctx, &corev1.PersistentVolumeClaim{ ObjectMeta: metav1.ObjectMeta{ Name: pvcName, @@ -268,14 +210,11 @@ func validateCSIDriver(ctx context.Context, kc *kubeclient.KubeClient, clusterNa // functional by creating a LoadBalancer service and verifying it is assigned // an external IP. func validateCCM(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { - clusterKC, err := kc.NewFromCluster(ctx, "default", clusterName) - if err != nil { - Fail(fmt.Sprintf("failed to create KubeClient for managed cluster %s: %v", clusterName, err)) - } + clusterKC := kc.NewFromCluster(ctx, "default", clusterName) createdServiceName := "loadbalancer-" + clusterName - _, err = clusterKC.Client.CoreV1().Services(clusterKC.Namespace).Create(ctx, &corev1.Service{ + _, err := clusterKC.Client.CoreV1().Services(clusterKC.Namespace).Create(ctx, &corev1.Service{ ObjectMeta: metav1.ObjectMeta{ Name: createdServiceName, }, diff --git a/test/managedcluster/vsphere/vsphere.go b/test/managedcluster/vsphere/vsphere.go new file mode 100644 index 000000000..4873dcdaf --- /dev/null +++ b/test/managedcluster/vsphere/vsphere.go @@ -0,0 +1,116 @@ +// Copyright 2024 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package vsphere + +import ( + "context" + "fmt" + "os" + + "github.com/Mirantis/hmc/test/kubeclient" + "github.com/Mirantis/hmc/test/managedcluster" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/dynamic" +) + +func CreateSecret(kc *kubeclient.KubeClient, secretName string) error { + ctx := context.Background() + _, err := kc.Client.CoreV1().Secrets(kc.Namespace).Get(ctx, secretName, metav1.GetOptions{}) + + if !apierrors.IsNotFound(err) { + return nil + } + username := os.Getenv("VSPHERE_USER") + password := os.Getenv("VSPHERE_PASSWORD") + + _, err = kc.Client.CoreV1().Secrets(kc.Namespace).Create(ctx, &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: secretName, + }, + StringData: map[string]string{ + "username": username, + "password": password, + }, + Type: corev1.SecretTypeOpaque, + }, metav1.CreateOptions{}) + + if err != nil { + return fmt.Errorf("failed to create vSphere credentials secret: %w", err) + } + + return nil +} + +func CreateClusterIdentity(kc *kubeclient.KubeClient, secretName string, identityName string) error { + ctx := context.Background() + client, err := dynamic.NewForConfig(kc.Config) + + if err != nil { + return fmt.Errorf("failed to create dynamic client: %w", err) + } + + gvr := schema.GroupVersionResource{ + Group: "infrastructure.cluster.x-k8s.io", + Version: "v1beta1", + Resource: "vsphereclusteridentities", + } + + clusterIdentity := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": "infrastructure.cluster.x-k8s.io/v1beta1", + "kind": "VSphereClusterIdentity", + "metadata": map[string]interface{}{ + "name": identityName, + }, + "spec": map[string]interface{}{ + "secretName": secretName, + "allowedNamespaces": map[string]interface{}{ + "selector": map[string]interface{}{ + "matchLabels": map[string]interface{}{}, + }, + }, + }, + }, + } + + result, err := client.Resource(gvr).Create(ctx, clusterIdentity, metav1.CreateOptions{}) + if err != nil { + fmt.Printf("%+v", result) + return fmt.Errorf("Failed to create vsphereclusteridentity: %w", err) + } + + return nil +} + +func CheckEnv() { + managedcluster.ValidateDeploymentVars([]string{ + "VSPHERE_USER", + "VSPHERE_PASSWORD", + "VSPHERE_SERVER", + "VSPHERE_THUMBPRINT", + "VSPHERE_DATACENTER", + "VSPHERE_DATASTORE", + "VSPHERE_RESOURCEPOOL", + "VSPHERE_FOLDER", + "VSPHERE_CONTROL_PLANE_ENDPOINT", + "VSPHERE_VM_TEMPLATE", + "VSPHERE_NETWORK", + "VSPHERE_SSH_KEY", + }) +} diff --git a/test/utils/utils.go b/test/utils/utils.go index 4e0d767f4..de714b0fe 100644 --- a/test/utils/utils.go +++ b/test/utils/utils.go @@ -27,8 +27,31 @@ import ( "k8s.io/apimachinery/pkg/runtime" ) -// Run executes the provided command within this context +// Run executes the provided command within this context and returns it's +// output. Run does not wait for the command to finish, use Wait instead. func Run(cmd *exec.Cmd) ([]byte, error) { + command := prepareCmd(cmd) + _, _ = fmt.Fprintf(GinkgoWriter, "running: %s\n", command) + + output, err := cmd.Output() + if err != nil { + return nil, handleCmdError(err, command) + } + + return output, nil +} + +func handleCmdError(err error, command string) error { + var exitError *exec.ExitError + + if errors.As(err, &exitError) { + return fmt.Errorf("%s failed with error: (%v): %s", command, err, string(exitError.Stderr)) + } + + return fmt.Errorf("%s failed with error: %w", command, err) +} + +func prepareCmd(cmd *exec.Cmd) string { dir, _ := GetProjectDir() cmd.Dir = dir @@ -37,19 +60,7 @@ func Run(cmd *exec.Cmd) ([]byte, error) { } cmd.Env = append(os.Environ(), "GO111MODULE=on") - command := strings.Join(cmd.Args, " ") - _, _ = fmt.Fprintf(GinkgoWriter, "running: %s\n", command) - - output, err := cmd.Output() - if err != nil { - var exitError *exec.ExitError - - if errors.As(err, &exitError) { - return output, fmt.Errorf("%s failed with error: (%v): %s", command, err, string(exitError.Stderr)) - } - } - - return output, nil + return strings.Join(cmd.Args, " ") } // LoadImageToKindCluster loads a local docker image to the kind cluster diff --git a/test/utils/vsphere/vsphere.go b/test/utils/vsphere/vsphere.go deleted file mode 100644 index d9ffcef14..000000000 --- a/test/utils/vsphere/vsphere.go +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright 2024 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package vsphereutils - -import ( - "fmt" - "os" -) - -func CheckEnv() error { - envVars := []string{ - "VSPHERE_USER", - "VSPHERE_PASSWORD", - "VSPHERE_SERVER", - "VSPHERE_THUMBPRINT", - "VSPHERE_DATACENTER", - "VSPHERE_DATASTORE", - "VSPHERE_RESOURCEPOOL", - "VSPHERE_FOLDER", - "VSPHERE_CONTROL_PLANE_ENDPOINT", - "VSPHERE_VM_TEMPLATE", - "VSPHERE_NETWORK", - "VSPHERE_SSH_KEY", - } - for _, envVar := range envVars { - _, exists := os.LookupEnv(envVar) - if !exists { - return fmt.Errorf("Variable %s isn't set", envVar) - } - } - return nil -}