Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into add-logs-agent-integ-…
Browse files Browse the repository at this point in the history
…test
  • Loading branch information
varunch77 committed Dec 4, 2024
2 parents b1b10cd + 9af4477 commit 58e35bc
Show file tree
Hide file tree
Showing 5 changed files with 111 additions and 39 deletions.
24 changes: 22 additions & 2 deletions .github/workflows/PR-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -78,14 +78,14 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [ ubuntu-latest, windows-2019, windows-latest, macos-12]
os: [ubuntu-latest, windows-2019, windows-latest, macos-13]
include:
- os: ubuntu-latest
family: linux
cache-path: |
~/.cache/go-build
~/go/pkg/mod
- os: macos-12
- os: macos-13
family: darwin
cache-path: |
~/Library/Caches/go-build
Expand Down Expand Up @@ -138,3 +138,23 @@ jobs:
- name: Build
if: steps.cached_binaries.outputs.cache-hit != 'true' && needs.changes.outputs.build == 'true'
run: make amazon-cloudwatch-agent-${{ matrix.family }}

test-data-race:
needs: [lint, changes]
name: Test data race
runs-on: ubuntu-latest
steps:
- name: Set up Go 1.x
if: needs.changes.outputs.build == 'true'
uses: actions/setup-go@v4
with:
go-version: ~1.22.2
cache: false

- name: Check out code
if: needs.changes.outputs.build == 'true'
uses: actions/checkout@v3

- name: Test data race
if: needs.changes.outputs.build == 'true'
run: make test-data-race
23 changes: 12 additions & 11 deletions .github/workflows/integration-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -842,7 +842,7 @@ jobs:
uses: actions/cache@v3
with:
path: go.mod
key: ${{ matrix.arrays.terraform_dir }}-${{ matrix.arrays.k8s_version }}-${{ matrix.arrays.instanceType }}-${{ github.sha }}-${{ matrix.arrays.os }}-${{ matrix.arrays.test_dir }}
key: ${{ matrix.arrays.terraform_dir }}-${{ matrix.arrays.k8sVersion }}-${{ matrix.arrays.instanceType }}-${{ github.sha }}-${{ matrix.arrays.os }}-${{ matrix.arrays.test_dir }}

- name: Login ECR
id: login-ecr
Expand Down Expand Up @@ -874,7 +874,7 @@ jobs:
-var="cwagent_image_tag=${{ github.sha }}" \
-var="ami_type=${{ matrix.arrays.ami }}" \
-var="instance_type=${{ matrix.arrays.instanceType }}" \
-var="k8s_version=${{ matrix.arrays.k8s_version }}"; then
-var="k8s_version=${{ matrix.arrays.k8sVersion }}"; then
terraform destroy -auto-approve
else
terraform destroy -auto-approve && exit 1
Expand Down Expand Up @@ -1254,7 +1254,7 @@ jobs:

GPUEndToEndTest:
name: "GPU E2E Test"
needs: [ StartLocalStack, GenerateTestMatrix, OutputEnvVariables ]
needs: [ GenerateTestMatrix, OutputEnvVariables ]
runs-on: ubuntu-latest
strategy:
fail-fast: false
Expand Down Expand Up @@ -1292,28 +1292,29 @@ jobs:
terraform init
if terraform apply --auto-approve \
-var="beta=true" \
-var="addon_name=amazon-cloudwatch-observability" \
-var="addon_version=v1.6.0-eksbuild.1" \
-var="k8s_version=1.29" ; then
-var="ami_type=${{ matrix.arrays.ami }}" \
-var="instance_type=${{ matrix.arrays.instanceType }}" \
-var="k8s_version=${{ matrix.arrays.k8sVersion }}"; then
echo "Terraform apply successful."
# Capture the output
echo "Getting EKS cluster name"
EKS_CLUSTER_NAME=$(terraform output -raw eks_cluster_name)
echo "Cluster name is ${EKS_CLUSTER_NAME}"
kubectl apply -f ./gpuBurner.yaml
kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.15.0/deployments/static/nvidia-device-plugin.yml
kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.17.0/deployments/static/nvidia-device-plugin.yml
kubectl patch amazoncloudwatchagents -n amazon-cloudwatch cloudwatch-agent --type='json' -p='[{"op": "replace", "path": "/spec/image", "value": ${{ secrets.AWS_ECR_PRIVATE_REGISTRY }}/${{ env.ECR_INTEGRATION_TEST_REPO }}:${{ github.sha }}}]'
kubectl rollout status daemonset nvidia-device-plugin-daemonset -n kube-system --timeout 10s
kubectl apply -f ./gpuBurner.yaml
else
terraform destroy -var="beta=${{ github.event.inputs.run_in_beta }}" -auto-approve && exit 1
fi
- name: Run Go tests with retry
uses: nick-fields/retry@v2
with:
max_attempts: 10
max_attempts: 5
timeout_minutes: 60
retry_wait_seconds: 60
retry_wait_seconds: 30
command: |
if [ "${{ matrix.arrays.terraform_dir }}" != "" ]; then
cd "${{ matrix.arrays.terraform_dir }}"
Expand Down Expand Up @@ -1344,4 +1345,4 @@ jobs:
else
cd terraform/eks/addon/gpu
fi
terraform destroy --auto-approve
terraform destroy -var="beta=${{ github.event.inputs.run_in_beta }}" -auto-approve
2 changes: 1 addition & 1 deletion .github/workflows/test-build-packages.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ on:
jobs:
MakeMacPkg:
name: 'MakeMacPkg'
runs-on: macos-12
runs-on: macos-13
permissions:
id-token: write
contents: read
Expand Down
17 changes: 17 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,23 @@ lint: install-golangci-lint simple-lint
test:
CGO_ENABLED=0 go test -timeout 15m -coverprofile coverage.txt -failfast ./...

# List of existing packages with data races
# TODO: Fix each
PKG_WITH_DATA_RACE := extension/entitystore
PKG_WITH_DATA_RACE += extension/server
PKG_WITH_DATA_RACE += internal/publisher
PKG_WITH_DATA_RACE += internal/retryer
PKG_WITH_DATA_RACE += internal/tls
PKG_WITH_DATA_RACE += plugins/inputs/logfile
PKG_WITH_DATA_RACE += plugins/inputs/logfile/tail
PKG_WITH_DATA_RACE += plugins/outputs/cloudwatch
PKG_WITH_DATA_RACE += plugins/outputs/cloudwatchlogs
PKG_WITH_DATA_RACE += plugins/processors/awsapplicationsignals
PKG_WITH_DATA_RACE += plugins/processors/ec2tagger
PKG_WITH_DATA_RACE_PATTERN := $(shell echo '$(PKG_WITH_DATA_RACE)' | tr ' ' '|')
test-data-race:
CGO_ENABLED=1 go test -timeout 15m -race -parallel 4 $(shell go list ./... | grep -v -E '$(PKG_WITH_DATA_RACE_PATTERN)')

clean::
rm -rf release/ build/
rm -f CWAGENT_VERSION
Expand Down
84 changes: 59 additions & 25 deletions tool/clean/clean_ami/clean_ami.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ import (
"fmt"
"log"
"sort"
"strings"
"time"

"github.com/aws/aws-sdk-go-v2/aws"
Expand All @@ -24,6 +23,36 @@ import (
"github.com/aws/amazon-cloudwatch-agent/tool/clean"
)

// Image Prefixes are taken from checking the Image Builder Pipelines in us-west-2
var imagePrefixes = []string{
"cloudwatch-agent-integration-test-aarch64-al2023",
"cloudwatch-agent-integration-test-al2",
"cloudwatch-agent-integration-test-alma-linux-8",
"cloudwatch-agent-integration-test-alma-linux-9",
"cloudwatch-agent-integration-test-arm64-al2",
"cloudwatch-agent-integration-test-debian-11-arm64",
"cloudwatch-agent-integration-test-debian-12-arm64",
"cloudwatch-agent-integration-test-nvidia-gpu-al2",
"cloudwatch-agent-integration-test-ol7",
"cloudwatch-agent-integration-test-ol8",
"cloudwatch-agent-integration-test-ol9",
"cloudwatch-agent-integration-test-rocky-linux-8",
"cloudwatch-agent-integration-test-rocky-linux-9",
"cloudwatch-agent-integration-test-sles-15",
"cloudwatch-agent-integration-test-ubuntu-23",
"cloudwatch-agent-integration-test-ubuntu-24",
"cloudwatch-agent-integration-test-ubuntu",
"cloudwatch-agent-integration-test-ubuntu-LTS-22",
"cloudwatch-agent-integration-test-win-10",
"cloudwatch-agent-integration-test-win-11",
"cloudwatch-agent-integration-test-win-2016",
"cloudwatch-agent-integration-test-win-2019",
"cloudwatch-agent-integration-test-win-2022",
"cloudwatch-agent-integration-test-x86-al2023",
"cloudwatch-agent-integration-test-mac",
"cloudwatch-agent-integration-test-nvidia-gpu",
}

func main() {
err := cleanAMIs()
if err != nil {
Expand Down Expand Up @@ -137,38 +166,43 @@ func cleanAMIs() error {
}
ec2client := ec2.NewFromConfig(defaultConfig)

// Get list of ami
nameFilter := types.Filter{Name: aws.String("name"), Values: []string{
"cloudwatch-agent-integration-test*",
}}

//get instances to delete
describeImagesInput := ec2.DescribeImagesInput{Filters: []types.Filter{nameFilter}}
describeImagesOutput, err := ec2client.DescribeImages(ctx, &describeImagesInput)
if err != nil {
return err
}

var errList []error
// stores a list of AMIs per each macos version/architecture
macosImageAmiMap := make(map[string][]types.Image)

for _, image := range describeImagesOutput.Images {
if image.Name != nil && strings.HasPrefix(*image.Name, "cloudwatch-agent-integration-test-mac") {
// mac image - add it to the map and do nothing else for now
macosImageAmiMap[*image.Name] = append(macosImageAmiMap[*image.Name], image)
} else {
// non mac image - clean it if it's older than 60 days
cleanNonMacAMIs(ctx, ec2client, image, expirationDate, &errList)
// Cleanup for each AMI image type
var errList []error
for _, filter := range imagePrefixes {
nameFilter := types.Filter{Name: aws.String("name"), Values: []string{
fmt.Sprintf("%s*", filter),
}}

//get instances to delete
describeImagesInput := ec2.DescribeImagesInput{Filters: []types.Filter{nameFilter}}
describeImagesOutput, err := ec2client.DescribeImages(ctx, &describeImagesInput)
if err != nil {
log.Printf("Image filter %s returned an error, skipping :%v", filter, err.Error())
continue
}

log.Printf("%s: %d images found", filter, len(describeImagesOutput.Images))
if len(describeImagesOutput.Images) <= 1 {
log.Printf("1 or less image found for filter %s, skipping", filter)
continue
}

for _, image := range describeImagesOutput.Images {
if image.Name != nil && filter == "cloudwatch-agent-integration-test-mac" {
// mac image - add it to the map and do nothing else for now
macosImageAmiMap[*image.Name] = append(macosImageAmiMap[*image.Name], image)
} else {
// non mac image - clean it if it's older than 60 days
cleanNonMacAMIs(ctx, ec2client, image, expirationDate, &errList)
}
}
}

// handle the mac AMIs
cleanMacAMIs(ctx, ec2client, macosImageAmiMap, expirationDate, &errList)

if len(errList) != 0 {
return fmt.Errorf("%v", errList)
}

return nil
}

0 comments on commit 58e35bc

Please sign in to comment.