Skip to content

Commit

Permalink
Fix various issues with hosted test, move resource nuke outside of test
Browse files Browse the repository at this point in the history
Signed-off-by: Kyle Squizzato <[email protected]>
  • Loading branch information
squizzi committed Sep 10, 2024
1 parent 1d5bbe0 commit 9ebce9c
Show file tree
Hide file tree
Showing 8 changed files with 108 additions and 109 deletions.
9 changes: 9 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ jobs:
id: vars
run: |
echo "version=$(git describe --tags --always)" >> $GITHUB_OUTPUT
echo "clustername=$(date +%s)-e2e-test" >> $GITHUB_OUTPUT
- name: Build and push HMC controller image
uses: docker/build-push-action@v6
with:
Expand All @@ -62,14 +63,22 @@ jobs:
env:
REGISTRY_REPO: 'oci://ghcr.io/mirantis/hmc/charts-ci'
IMG: 'ghcr.io/mirantis/hmc/controller-ci:${{ steps.vars.outputs.version }}'
MANAGED_CLUSTER_NAME: '${{ steps.vars.outputs.clustername }}'
AWS_REGION: us-west-2
AWS_ACCESS_KEY_ID: ${{ secrets.CI_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.CI_AWS_SECRET_ACCESS_KEY }}
run: |
make test-e2e
- name: Archive test results
if: failure()
uses: actions/upload-artifact@v4
with:
name: test-logs
path: |
test/e2e/*.log
- name: Cleanup test resources
if: always()
env:
CLUSTER_NAME: '${{ steps.vars.outputs.clustername }}'
run: |
make dev-aws-nuke
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -319,11 +319,11 @@ dev-creds-apply: dev-$(DEV_PROVIDER)-creds

.PHONY: envsubst awscli yq dev-aws-nuke
dev-aws-nuke: ## Warning: Destructive! Nuke all AWS resources deployed by 'DEV_PROVIDER=aws dev-provider-apply', prefix with CLUSTER_NAME to nuke a specific cluster.
@CLUSTER_NAME=$(CLUSTER_NAME) YQ=$(YQ) AWSCLI=$(AWSCLI) bash -c ./scripts/aws-nuke-ccm.sh
@CLUSTER_NAME=$(CLUSTER_NAME) YQ=$(YQ) AWSCLI=$(AWSCLI) bash -c "./scripts/aws-nuke-ccm.sh elb"
@CLUSTER_NAME=$(CLUSTER_NAME) $(ENVSUBST) < config/dev/cloud_nuke.yaml.tpl > config/dev/cloud_nuke.yaml
DISABLE_TELEMETRY=true $(CLOUDNUKE) aws --region $$AWS_REGION --force --config config/dev/cloud_nuke.yaml --resource-type vpc,eip,nat-gateway,ec2,ec2-subnet,elb,elbv2,ebs,internet-gateway,network-interface,security-group
@rm config/dev/cloud_nuke.yaml
@CLUSTER_NAME=$(CLUSTER_NAME) YQ=$(YQ) AWSCLI=$(AWSCLI) bash -c ./scripts/aws-nuke-ccm.sh
@CLUSTER_NAME=$(CLUSTER_NAME) YQ=$(YQ) AWSCLI=$(AWSCLI) bash -c "./scripts/aws-nuke-ccm.sh ebs"

.PHONY: test-apply
test-apply: kind-deploy dev-deploy dev-templates
Expand Down
50 changes: 26 additions & 24 deletions scripts/aws-nuke-ccm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,28 +33,30 @@ if [ -z $AWSCLI ]; then
exit 1
fi

echo "Checking for ELB with '$CLUSTER_NAME' tag"
for LOADBALANCER in $($AWSCLI elb describe-load-balancers --output yaml | $YQ '.LoadBalancerDescriptions[].LoadBalancerName');
do
echo "Checking ELB: $LOADBALANCER for tag"
DESCRIBE_TAGS=$($AWSCLI elb describe-tags \
--load-balancer-names $LOADBALANCER \
--output yaml | $YQ '.TagDescriptions[]' | grep '$CLUSTER_NAME')
if [[ $DESCRIBE_TAGS ]]; then
echo "Deleting ELB: $LOADBALANCER"
$AWSCLI elb delete-load-balancer --load-balancer-name $LOADBALANCER
fi
done
if [ "$1" == "elb" ]; then
echo "Checking for ELB with '$CLUSTER_NAME' tag"
for LOADBALANCER in $($AWSCLI elb describe-load-balancers --output yaml | $YQ '.LoadBalancerDescriptions[].LoadBalancerName');
do
echo "Checking ELB: $LOADBALANCER for tag"
DESCRIBE_TAGS=$($AWSCLI elb describe-tags --load-balancer-names $LOADBALANCER --output yaml | $YQ '.TagDescriptions[]' | grep $CLUSTER_NAME)
if [ ! -z "${DESCRIBE_TAGS}" ]; then
echo "Deleting ELB: $LOADBALANCER"
$AWSCLI elb delete-load-balancer --load-balancer-name $LOADBALANCER
fi
done
fi

echo "Checking for EBS Volumes with '$CLUSTER_NAME' within the 'kubernetes.io/created-for/pvc/name' tag"
for VOLUME in $($AWSCLI ec2 describe-volumes --output yaml | $YQ '.Volumes[].VolumeId');
do
echo "Checking EBS Volume: $VOLUME for $CLUSTER_NAME claim"
DESCRIBE_VOLUMES=$($AWSCLI ec2 describe-volumes \
--volume-id $VOLUME \
--output yaml | $YQ '.Volumes | to_entries[] | .value.Tags[] | select(.Key == "kubernetes.io/created-for/pvc/name")' | grep $CLUSTER_NAME)
if [[! $DESCRIBE_VOLUMES ]]; then
echo "Deleting EBS Volume: $VOLUME"
$AWSCLI ec2 delete-volume --volume-id $VOLUME
fi
done
if [ "$1" == "ebs" ]; then
echo "Checking for EBS Volumes with '$CLUSTER_NAME' within the 'kubernetes.io/created-for/pvc/name' tag"
for VOLUME in $($AWSCLI ec2 describe-volumes --output yaml | $YQ '.Volumes[].VolumeId');
do
echo "Checking EBS Volume: $VOLUME for $CLUSTER_NAME claim"
DESCRIBE_VOLUMES=$($AWSCLI ec2 describe-volumes \
--volume-id $VOLUME \
--output yaml | $YQ '.Volumes | to_entries[] | .value.Tags[] | select(.Key == "kubernetes.io/created-for/pvc/name")' | grep $CLUSTER_NAME)
if [ ! -z "${DESCRIBE_TAGS}" ]; then
echo "Deleting EBS Volume: $VOLUME"
$AWSCLI ec2 delete-volume --volume-id $VOLUME
fi
done
fi
98 changes: 47 additions & 51 deletions test/e2e/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,12 @@ var _ = Describe("controller", Ordered, func() {
Expect(err).NotTo(HaveOccurred())
})

// AfterAll(func() {
// By("removing the controller-manager")
// cmd := exec.Command("make", "test-destroy")
// _, err := utils.Run(cmd)
// Expect(err).NotTo(HaveOccurred())
// })
AfterAll(func() {
By("removing the controller-manager")
cmd := exec.Command("make", "test-destroy")
_, err := utils.Run(cmd)
Expect(err).NotTo(HaveOccurred())
})

Context("Operator", func() {
It("should run successfully", func() {
Expand All @@ -74,10 +74,11 @@ var _ = Describe("controller", Ordered, func() {
Describe("AWS Templates", Label("provider"), func() {
var (
kc *kubeclient.KubeClient
standaloneClient *kubeclient.KubeClient
standaloneDeleteFunc func() error
hostedDeleteFunc func() error
//kubecfgDeleteFunc func() error
clusterName string
kubecfgDeleteFunc func() error
clusterName string
)

BeforeAll(func() {
Expand All @@ -86,39 +87,31 @@ var _ = Describe("controller", Ordered, func() {
aws.CreateCredentialSecret(context.Background(), kc)
})

// AfterEach(func() {
// // If we failed collect logs from each of the affiliated controllers
// // as well as the output of clusterctl to store as artifacts.
// if CurrentSpecReport().Failed() {
// By("collecting failure logs from controllers")
// collectLogArtifacts(kc, clusterName, managedcluster.ProviderAWS, managedcluster.ProviderCAPI)

// By("deleting resources after failure")
// for _, deleteFunc := range []func() error{
// kubecfgDeleteFunc,
// hostedDeleteFunc,
// standaloneDeleteFunc,
// } {
// if deleteFunc != nil {
// err := deleteFunc()
// Expect(err).NotTo(HaveOccurred())
// }
// }
// }

// // Purge the AWS resources, the AfterAll for the controller will
// // clean up the management cluster.
// By("nuking remaining AWS resources")
// if clusterName != "" {
// // The nuke config will identify resources based on cluster name
// // so it should nuke hosted and standalone resources since they
// // are based on the same cluster name.
// GinkgoT().Setenv("CLUSTER_NAME", clusterName)
// cmd := exec.Command("make", "dev-aws-nuke")
// err := utils.Wait(cmd)
// ExpectWithOffset(2, err).NotTo(HaveOccurred())
// }
// })
AfterEach(func() {
// If we failed collect logs from each of the affiliated controllers
// as well as the output of clusterctl to store as artifacts.
if CurrentSpecReport().Failed() {
By("collecting failure logs from controllers")
if kc != nil {
collectLogArtifacts(kc, clusterName, managedcluster.ProviderAWS, managedcluster.ProviderCAPI)
}
if standaloneClient != nil {
collectLogArtifacts(standaloneClient, clusterName, managedcluster.ProviderAWS, managedcluster.ProviderCAPI)
}

By("deleting resources after failure")
for _, deleteFunc := range []func() error{
kubecfgDeleteFunc,
hostedDeleteFunc,
standaloneDeleteFunc,
} {
if deleteFunc != nil {
err := deleteFunc()
Expect(err).NotTo(HaveOccurred())
}
}
}
})

It("should work with an AWS provider", func() {
// Deploy a standalone cluster and verify it is running/ready.
Expand All @@ -136,7 +129,7 @@ var _ = Describe("controller", Ordered, func() {
return managedcluster.VerifyProviderDeployed(context.Background(), kc, clusterName)
}).WithTimeout(30 * time.Minute).WithPolling(10 * time.Second).Should(Succeed())

templateBy(managedcluster.TemplateAWSHostedCP, "installing controller and templates")
templateBy(managedcluster.TemplateAWSHostedCP, "installing controller and templates on standalone cluster")

// Download the KUBECONFIG for the standalone cluster and load it
// so we can call Make targets against this cluster.
Expand All @@ -156,13 +149,17 @@ var _ = Describe("controller", Ordered, func() {
Expect(err).NotTo(HaveOccurred())
Expect(os.Unsetenv("KUBECONFIG")).To(Succeed())

// Ensure AWS credentials are set in the standalone cluster.
standaloneClient = kc.NewFromCluster(context.Background(), namespace, clusterName)
aws.CreateCredentialSecret(context.Background(), standaloneClient)

templateBy(managedcluster.TemplateAWSHostedCP, "validating that the controller is ready")
Eventually(func() error {
err := verifyControllersUp(kc)
err := verifyControllersUp(standaloneClient)
if err != nil {
_, _ = fmt.Fprintf(
GinkgoWriter, "[%s] controller validation failed: %v\n",
string(managedcluster.TemplateAWSStandaloneCP), err)
string(managedcluster.TemplateAWSHostedCP), err)
return err
}
return nil
Expand All @@ -176,16 +173,15 @@ var _ = Describe("controller", Ordered, func() {
hd := managedcluster.GetUnstructured(managedcluster.ProviderAWS, managedcluster.TemplateAWSHostedCP)
hdName := hd.GetName()

// Ensure AWS credentials are set in the standalone cluster.
standaloneClient := kc.NewFromCluster(context.Background(), namespace, clusterName)
aws.CreateCredentialSecret(context.Background(), standaloneClient)

// Deploy the hosted cluster on top of the standalone cluster.
hostedDeleteFunc = standaloneClient.CreateManagedCluster(context.Background(), hd)

// Patch the AWSCluster resource as Ready, see:
// https://docs.k0smotron.io/stable/capi-aws/#prepare-the-aws-infra-provider
aws.PatchAWSClusterReady(context.Background(), standaloneClient, hd.GetName())
// Use Eventually as the AWSCluster might not be available
// immediately.
Eventually(aws.PatchAWSClusterReady(context.Background(), standaloneClient, hd.GetName())).
WithTimeout(time.Minute).WithPolling(time.Second).Should(Succeed())

// Verify the hosted cluster is running/ready.
templateBy(managedcluster.TemplateAWSHostedCP, "waiting for infrastructure to deploy successfully")
Expand Down Expand Up @@ -249,7 +245,7 @@ func collectLogArtifacts(kc *kubeclient.KubeClient, clusterName string, provider
}
defer podLogs.Close() //nolint:errcheck

output, err := os.Create(fmt.Sprintf("./test/e2e/%s.log", pod.Name))
output, err := os.Create(fmt.Sprintf("./test/e2e/%s.log", kc.Config.Username+"-"+pod.Name))
if err != nil {
utils.WarnError(fmt.Errorf("failed to create log file for pod %s: %w", pod.Name, err))
continue
Expand All @@ -272,7 +268,7 @@ func collectLogArtifacts(kc *kubeclient.KubeClient, clusterName string, provider
return
}

err = os.WriteFile(filepath.Join("test/e2e", "clusterctl.log"), output, 0644)
err = os.WriteFile(filepath.Join("test/e2e", kc.Config.Username+"-"+"clusterctl.log"), output, 0644)
if err != nil {
utils.WarnError(fmt.Errorf("failed to write clusterctl log: %w", err))
}
Expand Down
4 changes: 3 additions & 1 deletion test/kubeclient/kubeclient.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,9 @@ func (kc *KubeClient) CreateManagedCluster(
})

_, err := client.Create(ctx, managedcluster, metav1.CreateOptions{})
Expect(err).NotTo(HaveOccurred(), "failed to create %s", kind)
if !apierrors.IsAlreadyExists(err) {
Expect(err).NotTo(HaveOccurred(), "failed to create %s", kind)
}

return func() error {
err := client.Delete(ctx, managedcluster.GetName(), metav1.DeleteOptions{})
Expand Down
28 changes: 21 additions & 7 deletions test/managedcluster/aws/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package aws

import (
"context"
"encoding/json"
"fmt"
"os"
"os/exec"
Expand Down Expand Up @@ -73,8 +74,6 @@ func CreateCredentialSecret(ctx context.Context, kc *kubeclient.KubeClient) {
func PopulateHostedTemplateVars(ctx context.Context, kc *kubeclient.KubeClient) {
GinkgoHelper()

fmt.Println(os.Getenv(managedcluster.EnvVarManagedClusterName))

c := getAWSClusterClient(kc)
awsCluster, err := c.Get(ctx, os.Getenv(managedcluster.EnvVarManagedClusterName), metav1.GetOptions{})
Expect(err).NotTo(HaveOccurred(), "failed to get AWS cluster")
Expand All @@ -90,7 +89,7 @@ func PopulateHostedTemplateVars(ctx context.Context, kc *kubeclient.KubeClient)
subnet, ok := subnets[0].(map[string]interface{})
Expect(ok).To(BeTrue(), "failed to cast subnet to map")

subnetID, ok := subnet["id"].(string)
subnetID, ok := subnet["resourceID"].(string)
Expect(ok).To(BeTrue(), "failed to cast subnet ID to string")

subnetAZ, ok := subnet["availabilityZone"].(string)
Expand All @@ -107,14 +106,29 @@ func PopulateHostedTemplateVars(ctx context.Context, kc *kubeclient.KubeClient)
GinkgoT().Setenv(managedcluster.EnvVarAWSSecurityGroupID, securityGroupID)
}

func PatchAWSClusterReady(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) {
func PatchAWSClusterReady(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error {
GinkgoHelper()

_, _ = fmt.Fprintf(GinkgoWriter, "Patching AWS cluster %q to ready\n", clusterName)

c := getAWSClusterClient(kc)

_, err := c.Patch(ctx, clusterName, types.MergePatchType,
[]byte("status: {ready: true}"), metav1.PatchOptions{}, "status")
Expect(err).NotTo(HaveOccurred())
trueStatus := map[string]interface{}{
"status": map[string]interface{}{
"ready": true,
},
}

patchBytes, err := json.Marshal(trueStatus)
Expect(err).NotTo(HaveOccurred(), "failed to marshal patch bytes")

_, err = c.Patch(ctx, clusterName, types.MergePatchType,
patchBytes, metav1.PatchOptions{}, "status")
if err != nil {
return err
}

return nil
}

func getAWSClusterClient(kc *kubeclient.KubeClient) dynamic.ResourceInterface {
Expand Down
1 change: 0 additions & 1 deletion test/managedcluster/resources/aws-hosted-cp.yaml.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ spec:
config:
vpcID: ${AWS_VPC_ID}
region: ${AWS_REGION}
publicIP: ${AWS_PUBLIC_IP:=true}
subnets:
- id: ${AWS_SUBNET_ID}
availabilityZone: ${AWS_SUBNET_AVAILABILITY_ZONE}
Expand Down
23 changes: 0 additions & 23 deletions test/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
package utils

import (
"bytes"
"errors"
"fmt"
"os"
Expand All @@ -42,28 +41,6 @@ func Run(cmd *exec.Cmd) ([]byte, error) {
return output, nil
}

// Wait executes the provided command within this context and waits for it to
// finish. It then prints the output to the GinkgoWriter.
func Wait(cmd *exec.Cmd) error {
command := prepareCmd(cmd)
_, _ = fmt.Fprintf(GinkgoWriter, "waiting on: %s\n", command)

buf := new(bytes.Buffer)

if err := cmd.Start(); err != nil {
return fmt.Errorf("failed to start command: %w", err)
}

// And when you need to wait for the command to finish:
if err := cmd.Wait(); err != nil {
return handleCmdError(err, command)
}

_, _ = fmt.Fprintf(GinkgoWriter, "%s output: %s\n", command, buf.String())

return nil
}

func handleCmdError(err error, command string) error {
var exitError *exec.ExitError

Expand Down

0 comments on commit 9ebce9c

Please sign in to comment.