diff --git a/.github/workflows/PR-build.yml b/.github/workflows/PR-build.yml index 32b1315def..130ebff790 100644 --- a/.github/workflows/PR-build.yml +++ b/.github/workflows/PR-build.yml @@ -54,6 +54,8 @@ jobs: - name: Check out code if: needs.changes.outputs.lint == 'true' uses: actions/checkout@v3 + with: + fetch-depth: 0 - name: Check format if: needs.changes.outputs.lint == 'true' @@ -67,7 +69,7 @@ jobs: - name: Check license and imports if: needs.changes.outputs.lint == 'true' - run: make simple-lint + run: make lint build: needs: [lint, changes] @@ -76,14 +78,14 @@ jobs: strategy: fail-fast: false matrix: - os: [ ubuntu-latest, windows-2019, windows-latest, macos-12] + os: [ubuntu-latest, windows-2019, windows-latest, macos-13] include: - os: ubuntu-latest family: linux cache-path: | ~/.cache/go-build ~/go/pkg/mod - - os: macos-12 + - os: macos-13 family: darwin cache-path: | ~/Library/Caches/go-build @@ -136,3 +138,23 @@ jobs: - name: Build if: steps.cached_binaries.outputs.cache-hit != 'true' && needs.changes.outputs.build == 'true' run: make amazon-cloudwatch-agent-${{ matrix.family }} + + test-data-race: + needs: [lint, changes] + name: Test data race + runs-on: ubuntu-latest + steps: + - name: Set up Go 1.x + if: needs.changes.outputs.build == 'true' + uses: actions/setup-go@v4 + with: + go-version: ~1.22.2 + cache: false + + - name: Check out code + if: needs.changes.outputs.build == 'true' + uses: actions/checkout@v3 + + - name: Test data race + if: needs.changes.outputs.build == 'true' + run: make test-data-race diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml index 222e465bd4..18936b8eb5 100644 --- a/.github/workflows/integration-test.yml +++ b/.github/workflows/integration-test.yml @@ -563,7 +563,7 @@ jobs: uses: nick-fields/retry@v2 with: max_attempts: 3 - timeout_minutes: 30 + timeout_minutes: 60 retry_wait_seconds: 5 command: | if [ "${{ matrix.arrays.terraform_dir }}" != "" ]; then @@ -842,7 +842,7 @@ jobs: uses: actions/cache@v3 with: path: go.mod - key: ${{ matrix.arrays.terraform_dir }}-${{ matrix.arrays.k8s_version }}-${{ matrix.arrays.instanceType }}-${{ github.sha }}-${{ matrix.arrays.os }}-${{ matrix.arrays.test_dir }} + key: ${{ matrix.arrays.terraform_dir }}-${{ matrix.arrays.k8sVersion }}-${{ matrix.arrays.instanceType }}-${{ github.sha }}-${{ matrix.arrays.os }}-${{ matrix.arrays.test_dir }} - name: Login ECR id: login-ecr @@ -874,7 +874,7 @@ jobs: -var="cwagent_image_tag=${{ github.sha }}" \ -var="ami_type=${{ matrix.arrays.ami }}" \ -var="instance_type=${{ matrix.arrays.instanceType }}" \ - -var="k8s_version=${{ matrix.arrays.k8s_version }}"; then + -var="k8s_version=${{ matrix.arrays.k8sVersion }}"; then terraform destroy -auto-approve else terraform destroy -auto-approve && exit 1 @@ -1254,7 +1254,7 @@ jobs: GPUEndToEndTest: name: "GPU E2E Test" - needs: [ StartLocalStack, GenerateTestMatrix, OutputEnvVariables ] + needs: [ GenerateTestMatrix, OutputEnvVariables ] runs-on: ubuntu-latest strategy: fail-fast: false @@ -1292,28 +1292,30 @@ jobs: terraform init if terraform apply --auto-approve \ -var="beta=true" \ - -var="addon_name=amazon-cloudwatch-observability" \ - -var="addon_version=v1.6.0-eksbuild.1" \ - -var="k8s_version=1.29" ; then + -var="ami_type=${{ matrix.arrays.ami }}" \ + -var="instance_type=${{ matrix.arrays.instanceType }}" \ + -var="k8s_version=${{ matrix.arrays.k8sVersion }}"; then echo "Terraform apply successful." # Capture the output echo "Getting EKS cluster name" EKS_CLUSTER_NAME=$(terraform output -raw eks_cluster_name) echo "Cluster name is ${EKS_CLUSTER_NAME}" - kubectl apply -f ./gpuBurner.yaml - kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.15.0/deployments/static/nvidia-device-plugin.yml + kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.17.0/deployments/static/nvidia-device-plugin.yml kubectl patch amazoncloudwatchagents -n amazon-cloudwatch cloudwatch-agent --type='json' -p='[{"op": "replace", "path": "/spec/image", "value": ${{ secrets.AWS_ECR_PRIVATE_REGISTRY }}/${{ env.ECR_INTEGRATION_TEST_REPO }}:${{ github.sha }}}]' + # wait nvidia device plugin to be ready + sleep 10 + kubectl apply -f ./gpuBurner.yaml else - terraform destroy -var="beta=${{ github.event.inputs.run_in_beta }}" -auto-approve && exit 1 + terraform destroy -auto-approve && exit 1 fi - name: Run Go tests with retry uses: nick-fields/retry@v2 with: - max_attempts: 10 + max_attempts: 5 timeout_minutes: 60 - retry_wait_seconds: 60 + retry_wait_seconds: 30 command: | if [ "${{ matrix.arrays.terraform_dir }}" != "" ]; then cd "${{ matrix.arrays.terraform_dir }}" @@ -1344,4 +1346,4 @@ jobs: else cd terraform/eks/addon/gpu fi - terraform destroy --auto-approve + terraform destroy -auto-approve diff --git a/.github/workflows/test-build-packages.yml b/.github/workflows/test-build-packages.yml index c2e4e3c1b9..e44ddb6c7f 100644 --- a/.github/workflows/test-build-packages.yml +++ b/.github/workflows/test-build-packages.yml @@ -62,7 +62,7 @@ on: jobs: MakeMacPkg: name: 'MakeMacPkg' - runs-on: macos-12 + runs-on: macos-13 permissions: id-token: write contents: read diff --git a/Makefile b/Makefile index ee1073fa92..da9c8294a0 100644 --- a/Makefile +++ b/Makefile @@ -158,7 +158,7 @@ install-addlicense: install-golangci-lint: #Install from source for golangci-lint is not recommended based on https://golangci-lint.run/usage/install/#install-from-source so using binary #installation - curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(TOOLS_BIN_DIR) v1.50.1 + curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(TOOLS_BIN_DIR) v1.62.2 fmt: install-goimports addlicense go fmt ./... @@ -201,6 +201,23 @@ lint: install-golangci-lint simple-lint test: CGO_ENABLED=0 go test -timeout 15m -coverprofile coverage.txt -failfast ./... +# List of existing packages with data races +# TODO: Fix each +PKG_WITH_DATA_RACE := extension/entitystore +PKG_WITH_DATA_RACE += extension/server +PKG_WITH_DATA_RACE += internal/publisher +PKG_WITH_DATA_RACE += internal/retryer +PKG_WITH_DATA_RACE += internal/tls +PKG_WITH_DATA_RACE += plugins/inputs/logfile +PKG_WITH_DATA_RACE += plugins/inputs/logfile/tail +PKG_WITH_DATA_RACE += plugins/outputs/cloudwatch +PKG_WITH_DATA_RACE += plugins/outputs/cloudwatchlogs +PKG_WITH_DATA_RACE += plugins/processors/awsapplicationsignals +PKG_WITH_DATA_RACE += plugins/processors/ec2tagger +PKG_WITH_DATA_RACE_PATTERN := $(shell echo '$(PKG_WITH_DATA_RACE)' | tr ' ' '|') +test-data-race: + CGO_ENABLED=1 go test -timeout 15m -race -parallel 4 $(shell go list ./... | grep -v -E '$(PKG_WITH_DATA_RACE_PATTERN)') + clean:: rm -rf release/ build/ rm -f CWAGENT_VERSION diff --git a/RELEASE_NOTES b/RELEASE_NOTES index 51bd121123..0da54f86fe 100644 --- a/RELEASE_NOTES +++ b/RELEASE_NOTES @@ -1,3 +1,16 @@ +======================================================================== +Amazon CloudWatch Agent 1.300051.0 (2024-12-11) +======================================================================== +Bug Fixes: +* Fix Excessive IMDS related error logging +* Fixed a concurrency issue in entity attribute handling that was causing agent crashes due to simultaneous map writes. + +Enhancements: +* [AppSignal] Support RemoteEnvironment dimension for non-Kubernetes platforms. +* [Logs] Support exporting large exponential histograms as EMF logs +* [Logs] Reduce EMF exporter verbose logging +* [Traces] Generate URL section in X-Ray segment when net.peer.name attribute is available + ======================================================================== Amazon CloudWatch Agent 1.300050.0 (2024-11-18) ======================================================================== diff --git a/extension/agenthealth/handler/stats/handler_test.go b/extension/agenthealth/handler/stats/handler_test.go index 231981b758..30e603c02a 100644 --- a/extension/agenthealth/handler/stats/handler_test.go +++ b/extension/agenthealth/handler/stats/handler_test.go @@ -90,4 +90,4 @@ func TestNewHandlersWithoutStatusCodeAndAgenthStats(t *testing.T) { requestHandlers, responseHandlers := NewHandlers(zap.NewNop(), agent.StatsConfig{}, false, false) assert.Len(t, requestHandlers, 0) assert.Len(t, responseHandlers, 0) -} \ No newline at end of file +} diff --git a/extension/entitystore/ec2Info.go b/extension/entitystore/ec2Info.go index cfb2eccba8..e646fca78c 100644 --- a/extension/entitystore/ec2Info.go +++ b/extension/entitystore/ec2Info.go @@ -14,6 +14,7 @@ import ( "github.com/aws/amazon-cloudwatch-agent/internal/ec2metadataprovider" "github.com/aws/amazon-cloudwatch-agent/plugins/processors/ec2tagger" + "github.com/aws/amazon-cloudwatch-agent/translator/config" ) const ( @@ -32,7 +33,8 @@ type EC2Info struct { AutoScalingGroup string // region is used while making call to describeTags Ec2 API for AutoScalingGroup - Region string + Region string + kubernetesMode string metadataProvider ec2metadataprovider.MetadataProvider logger *zap.Logger @@ -48,8 +50,11 @@ func (ei *EC2Info) initEc2Info() { if err := ei.setInstanceIDAccountID(); err != nil { return } - if err := ei.setAutoScalingGroup(); err != nil { - return + // Instance metadata tags is not usable for EKS nodes + // https://github.com/kubernetes/cloud-provider-aws/issues/762 + if ei.kubernetesMode != config.ModeEKS { + limitedRetryer := NewRetryer(true, true, defaultJitterMin, defaultJitterMax, ec2tagger.BackoffSleepArray, maxRetry, ei.done, ei.logger) + limitedRetryer.refreshLoop(ei.retrieveAsgName) } ei.logger.Debug("Finished initializing EC2Info") } @@ -99,49 +104,16 @@ func (ei *EC2Info) setInstanceIDAccountID() error { } } -func (ei *EC2Info) setAutoScalingGroup() error { - retry := 0 - for { - var waitDuration time.Duration - if retry < len(ec2tagger.BackoffSleepArray) { - waitDuration = ec2tagger.BackoffSleepArray[retry] - } else { - waitDuration = ec2tagger.BackoffSleepArray[len(ec2tagger.BackoffSleepArray)-1] - } - - wait := time.NewTimer(waitDuration) - select { - case <-ei.done: - wait.Stop() - return errors.New("shutdown signal received") - case <-wait.C: - } - - if retry > 0 { - ei.logger.Debug("Initial retrieval of tags and volumes", zap.Int("retry", retry)) - } - - if err := ei.retrieveAsgName(); err != nil { - ei.logger.Debug("Unable to fetch instance tags with imds", zap.Int("retry", retry), zap.Error(err)) - } else { - ei.logger.Debug("Retrieval of auto-scaling group tags succeeded") - return nil - } - - retry++ - } - -} - func (ei *EC2Info) retrieveAsgName() error { tags, err := ei.metadataProvider.InstanceTags(context.Background()) if err != nil { - ei.logger.Debug("Failed to get tags through metadata provider", zap.Error(err)) + ei.logger.Debug("Failed to get AutoScalingGroup from instance tags. This is likely because instance tag is not enabled for IMDS but will not affect agent functionality.") return err } else if strings.Contains(tags, ec2tagger.Ec2InstanceTagKeyASG) { asg, err := ei.metadataProvider.InstanceTagValue(context.Background(), ec2tagger.Ec2InstanceTagKeyASG) if err != nil { ei.logger.Error("Failed to get AutoScalingGroup through metadata provider", zap.Error(err)) + return err } else { ei.logger.Debug("AutoScalingGroup retrieved through IMDS") ei.mutex.Lock() @@ -156,9 +128,10 @@ func (ei *EC2Info) retrieveAsgName() error { return nil } -func newEC2Info(metadataProvider ec2metadataprovider.MetadataProvider, done chan struct{}, region string, logger *zap.Logger) *EC2Info { +func newEC2Info(metadataProvider ec2metadataprovider.MetadataProvider, kubernetesMode string, done chan struct{}, region string, logger *zap.Logger) *EC2Info { return &EC2Info{ metadataProvider: metadataProvider, + kubernetesMode: kubernetesMode, done: done, Region: region, logger: logger, diff --git a/extension/entitystore/ec2Info_test.go b/extension/entitystore/ec2Info_test.go index 6602752c5a..9cc4efd896 100644 --- a/extension/entitystore/ec2Info_test.go +++ b/extension/entitystore/ec2Info_test.go @@ -15,6 +15,7 @@ import ( "go.uber.org/zap" "github.com/aws/amazon-cloudwatch-agent/internal/ec2metadataprovider" + "github.com/aws/amazon-cloudwatch-agent/translator/config" ) var mockedInstanceIdentityDoc = &ec2metadata.EC2InstanceIdentityDocument{ @@ -236,3 +237,35 @@ func TestNotInitIfMetadataProviderIsEmpty(t *testing.T) { }) } } + +func TestNoASGRetrievalInKubernetesMode(t *testing.T) { + type args struct { + metadataProvider ec2metadataprovider.MetadataProvider + kubernetesMode string + } + tests := []struct { + name string + args args + wantErr bool + want string + }{ + { + name: "EKSNoASGFromEC2Info", + args: args{ + metadataProvider: &mockMetadataProvider{InstanceIdentityDocument: mockedInstanceIdentityDoc, Tags: map[string]string{"aws:autoscaling:groupName": tagVal3}}, + kubernetesMode: config.ModeEKS, + }, + wantErr: false, + want: "", + }, + } + for _, tt := range tests { + logger, _ := zap.NewDevelopment() + t.Run(tt.name, func(t *testing.T) { + ei := &EC2Info{metadataProvider: tt.args.metadataProvider, kubernetesMode: tt.args.kubernetesMode, logger: logger} + go ei.initEc2Info() + time.Sleep(3 * time.Second) + assert.Equal(t, tt.want, ei.GetAutoScalingGroup()) + }) + } +} diff --git a/extension/entitystore/extension.go b/extension/entitystore/extension.go index a6af693cb3..a486134507 100644 --- a/extension/entitystore/extension.go +++ b/extension/entitystore/extension.go @@ -94,9 +94,13 @@ func (e *EntityStore) Start(ctx context.Context, host component.Host) error { e.serviceprovider = newServiceProvider(e.mode, e.config.Region, &e.ec2Info, e.metadataprovider, getEC2Provider, ec2CredentialConfig, e.done, e.logger) switch e.mode { case config.ModeEC2: - e.ec2Info = *newEC2Info(e.metadataprovider, e.done, e.config.Region, e.logger) + e.ec2Info = *newEC2Info(e.metadataprovider, e.kubernetesMode, e.done, e.config.Region, e.logger) go e.ec2Info.initEc2Info() - go e.serviceprovider.startServiceProvider() + // Instance metadata tags is not usable for EKS nodes + // https://github.com/kubernetes/cloud-provider-aws/issues/762 + if e.kubernetesMode == "" { + go e.serviceprovider.startServiceProvider() + } } if e.kubernetesMode != "" { e.eksInfo = newEKSInfo(e.logger) diff --git a/extension/entitystore/extension_test.go b/extension/entitystore/extension_test.go index 5662cf1d88..02cdff56d3 100644 --- a/extension/entitystore/extension_test.go +++ b/extension/entitystore/extension_test.go @@ -626,6 +626,58 @@ func TestEntityStore_LogMessageDoesNotIncludeResourceInfo(t *testing.T) { } } +func TestEntityStore_ServiceProviderInDifferentEnv(t *testing.T) { + type args struct { + mode string + kubernetesMode string + } + tests := []struct { + name string + args args + }{ + { + name: "EC2inEKS", + args: args{ + mode: config.ModeEC2, + kubernetesMode: config.ModeEKS, + }, + }, + { + name: "EC2Only", + args: args{ + mode: config.ModeEC2, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + + esConfig := &Config{ + Mode: tt.args.mode, + KubernetesMode: tt.args.kubernetesMode, + } + getMetaDataProvider = mockMetadataProviderFunc + e := EntityStore{ + logger: zap.NewNop(), + config: esConfig, + } + e.Start(context.TODO(), nil) + time.Sleep(3 * time.Second) + + name, source := e.serviceprovider.getServiceNameAndSource() + if tt.args.mode == config.ModeEC2 && tt.args.kubernetesMode != "" { + assert.Equal(t, name, ServiceNameUnknown) + assert.Equal(t, source, ServiceNameSourceUnknown) + } else if tt.args.mode == config.ModeEC2 && tt.args.kubernetesMode == "" { + assert.Equal(t, name, "TestRole") + assert.Equal(t, source, ServiceNameSourceClientIamRole) + } + + }) + } + +} + func assertIfNonEmpty(t *testing.T, message string, pattern string) { if pattern != "" { assert.NotContains(t, message, pattern) diff --git a/extension/entitystore/retryer.go b/extension/entitystore/retryer.go index cefa06d374..65829f8970 100644 --- a/extension/entitystore/retryer.go +++ b/extension/entitystore/retryer.go @@ -77,7 +77,7 @@ func (r *Retryer) refreshLoop(updateFunc func() error) int { if err != nil { retry++ - r.logger.Debug("there was an error when retrieving service attribute.", zap.Error(err)) + r.logger.Debug("there was an issue when retrieving entity attributes but will not affect agent functionality", zap.Error(err)) } else { retry = 1 } diff --git a/extension/entitystore/serviceprovider.go b/extension/entitystore/serviceprovider.go index c65a0daf62..9f36dd9005 100644 --- a/extension/entitystore/serviceprovider.go +++ b/extension/entitystore/serviceprovider.go @@ -240,7 +240,7 @@ func (s *serviceprovider) scrapeIAMRole() error { func (s *serviceprovider) scrapeImdsServiceName() error { tags, err := s.metadataProvider.InstanceTags(context.Background()) if err != nil { - s.logger.Debug("Failed to get tags through metadata provider", zap.Error(err)) + s.logger.Debug("Failed to get service name from instance tags. This is likely because instance tag is not enabled for IMDS but will not affect agent functionality.") return err } // This will check whether the tags contains SERVICE, APPLICATION, APP, in that order. diff --git a/internal/retryer/imdsretryer.go b/internal/retryer/imdsretryer.go index 29dec2976f..5a4322c479 100644 --- a/internal/retryer/imdsretryer.go +++ b/internal/retryer/imdsretryer.go @@ -43,7 +43,6 @@ func (r IMDSRetryer) ShouldRetry(req *request.Request) bool { if awsError, ok := req.Error.(awserr.Error); r.DefaultRetryer.ShouldRetry(req) || (ok && awsError != nil && awsError.Code() == "EC2MetadataError") { shouldRetry = true } - fmt.Printf("D! should retry %t for imds error : %v", shouldRetry, req.Error) return shouldRetry } diff --git a/tool/clean/clean_ami/clean_ami.go b/tool/clean/clean_ami/clean_ami.go index edfdc9c7b0..fb254ba1d6 100644 --- a/tool/clean/clean_ami/clean_ami.go +++ b/tool/clean/clean_ami/clean_ami.go @@ -12,7 +12,6 @@ import ( "fmt" "log" "sort" - "strings" "time" "github.com/aws/aws-sdk-go-v2/aws" @@ -24,6 +23,36 @@ import ( "github.com/aws/amazon-cloudwatch-agent/tool/clean" ) +// Image Prefixes are taken from checking the Image Builder Pipelines in us-west-2 +var imagePrefixes = []string{ + "cloudwatch-agent-integration-test-aarch64-al2023", + "cloudwatch-agent-integration-test-al2", + "cloudwatch-agent-integration-test-alma-linux-8", + "cloudwatch-agent-integration-test-alma-linux-9", + "cloudwatch-agent-integration-test-arm64-al2", + "cloudwatch-agent-integration-test-debian-11-arm64", + "cloudwatch-agent-integration-test-debian-12-arm64", + "cloudwatch-agent-integration-test-nvidia-gpu-al2", + "cloudwatch-agent-integration-test-ol7", + "cloudwatch-agent-integration-test-ol8", + "cloudwatch-agent-integration-test-ol9", + "cloudwatch-agent-integration-test-rocky-linux-8", + "cloudwatch-agent-integration-test-rocky-linux-9", + "cloudwatch-agent-integration-test-sles-15", + "cloudwatch-agent-integration-test-ubuntu-23", + "cloudwatch-agent-integration-test-ubuntu-24", + "cloudwatch-agent-integration-test-ubuntu", + "cloudwatch-agent-integration-test-ubuntu-LTS-22", + "cloudwatch-agent-integration-test-win-10", + "cloudwatch-agent-integration-test-win-11", + "cloudwatch-agent-integration-test-win-2016", + "cloudwatch-agent-integration-test-win-2019", + "cloudwatch-agent-integration-test-win-2022", + "cloudwatch-agent-integration-test-x86-al2023", + "cloudwatch-agent-integration-test-mac", + "cloudwatch-agent-integration-test-nvidia-gpu", +} + func main() { err := cleanAMIs() if err != nil { @@ -137,38 +166,43 @@ func cleanAMIs() error { } ec2client := ec2.NewFromConfig(defaultConfig) - // Get list of ami - nameFilter := types.Filter{Name: aws.String("name"), Values: []string{ - "cloudwatch-agent-integration-test*", - }} - - //get instances to delete - describeImagesInput := ec2.DescribeImagesInput{Filters: []types.Filter{nameFilter}} - describeImagesOutput, err := ec2client.DescribeImages(ctx, &describeImagesInput) - if err != nil { - return err - } - - var errList []error // stores a list of AMIs per each macos version/architecture macosImageAmiMap := make(map[string][]types.Image) - for _, image := range describeImagesOutput.Images { - if image.Name != nil && strings.HasPrefix(*image.Name, "cloudwatch-agent-integration-test-mac") { - // mac image - add it to the map and do nothing else for now - macosImageAmiMap[*image.Name] = append(macosImageAmiMap[*image.Name], image) - } else { - // non mac image - clean it if it's older than 60 days - cleanNonMacAMIs(ctx, ec2client, image, expirationDate, &errList) + // Cleanup for each AMI image type + var errList []error + for _, filter := range imagePrefixes { + nameFilter := types.Filter{Name: aws.String("name"), Values: []string{ + fmt.Sprintf("%s*", filter), + }} + + //get instances to delete + describeImagesInput := ec2.DescribeImagesInput{Filters: []types.Filter{nameFilter}} + describeImagesOutput, err := ec2client.DescribeImages(ctx, &describeImagesInput) + if err != nil { + log.Printf("Image filter %s returned an error, skipping :%v", filter, err.Error()) + continue + } + + log.Printf("%s: %d images found", filter, len(describeImagesOutput.Images)) + if len(describeImagesOutput.Images) <= 1 { + log.Printf("1 or less image found for filter %s, skipping", filter) + continue + } + + for _, image := range describeImagesOutput.Images { + if image.Name != nil && filter == "cloudwatch-agent-integration-test-mac" { + // mac image - add it to the map and do nothing else for now + macosImageAmiMap[*image.Name] = append(macosImageAmiMap[*image.Name], image) + } else { + // non mac image - clean it if it's older than 60 days + cleanNonMacAMIs(ctx, ec2client, image, expirationDate, &errList) + } } } // handle the mac AMIs cleanMacAMIs(ctx, ec2client, macosImageAmiMap, expirationDate, &errList) - if len(errList) != 0 { - return fmt.Errorf("%v", errList) - } - return nil } diff --git a/translator/tocwconfig/sampleConfig/appsignals_and_ecs_config.yaml b/translator/tocwconfig/sampleConfig/appsignals_and_ecs_config.yaml index 760f37540f..7be5a4b186 100644 --- a/translator/tocwconfig/sampleConfig/appsignals_and_ecs_config.yaml +++ b/translator/tocwconfig/sampleConfig/appsignals_and_ecs_config.yaml @@ -30,40 +30,70 @@ exporters: - Fault - Error - dimensions: - - - Environment - - Operation - - RemoteOperation - - RemoteResourceIdentifier - - RemoteResourceType - - RemoteService - - Service - - - Environment - - Operation - - RemoteOperation - - RemoteService - - Service - - - Environment - - RemoteService - - Service - - - Environment - - RemoteOperation - - RemoteResourceIdentifier - - RemoteResourceType - - RemoteService - - Service - - - Environment - - RemoteOperation - - RemoteService - - Service - - - Environment - - RemoteResourceIdentifier - - RemoteResourceType - - RemoteService - - Service - - - RemoteResourceIdentifier - - RemoteResourceType - - RemoteService - - - RemoteService + - - Environment + - Operation + - RemoteEnvironment + - RemoteOperation + - RemoteResourceIdentifier + - RemoteResourceType + - RemoteService + - Service + - - Environment + - Operation + - RemoteEnvironment + - RemoteOperation + - RemoteService + - Service + - - Environment + - Operation + - RemoteOperation + - RemoteResourceIdentifier + - RemoteResourceType + - RemoteService + - Service + - - Environment + - Operation + - RemoteOperation + - RemoteService + - Service + - - Environment + - RemoteEnvironment + - RemoteService + - Service + - - Environment + - RemoteService + - Service + - - Environment + - RemoteEnvironment + - RemoteOperation + - RemoteResourceIdentifier + - RemoteResourceType + - RemoteService + - Service + - - Environment + - RemoteEnvironment + - RemoteOperation + - RemoteService + - Service + - - Environment + - RemoteOperation + - RemoteResourceIdentifier + - RemoteResourceType + - RemoteService + - Service + - - Environment + - RemoteOperation + - RemoteService + - Service + - - Environment + - RemoteResourceIdentifier + - RemoteResourceType + - RemoteService + - Service + - - RemoteResourceIdentifier + - RemoteResourceType + - RemoteService + - - RemoteService label_matchers: - label_names: - Telemetry.Source diff --git a/translator/tocwconfig/sampleConfig/base_appsignals_config.yaml b/translator/tocwconfig/sampleConfig/base_appsignals_config.yaml index e7416943af..2f8b79e011 100644 --- a/translator/tocwconfig/sampleConfig/base_appsignals_config.yaml +++ b/translator/tocwconfig/sampleConfig/base_appsignals_config.yaml @@ -32,6 +32,7 @@ exporters: - dimensions: - - Environment - Operation + - RemoteEnvironment - RemoteOperation - RemoteResourceIdentifier - RemoteResourceType @@ -39,10 +40,39 @@ exporters: - Service - - Environment - Operation + - RemoteEnvironment - RemoteOperation - RemoteService - Service - - Environment + - Operation + - RemoteOperation + - RemoteResourceIdentifier + - RemoteResourceType + - RemoteService + - Service + - - Environment + - Operation + - RemoteOperation + - RemoteService + - Service + - - Environment + - RemoteEnvironment + - RemoteService + - Service + - - Environment + - RemoteService + - Service + - - Environment + - RemoteEnvironment + - RemoteOperation + - RemoteResourceIdentifier + - RemoteResourceType + - RemoteService + - Service + - - Environment + - RemoteEnvironment + - RemoteOperation - RemoteService - Service - - Environment diff --git a/translator/tocwconfig/sampleConfig/base_appsignals_fallback_config.yaml b/translator/tocwconfig/sampleConfig/base_appsignals_fallback_config.yaml index 1b0a6da034..e410137aaa 100644 --- a/translator/tocwconfig/sampleConfig/base_appsignals_fallback_config.yaml +++ b/translator/tocwconfig/sampleConfig/base_appsignals_fallback_config.yaml @@ -1,135 +1,164 @@ exporters: - awsemf/application_signals: - certificate_file_path: "" - detailed_metrics: false - dimension_rollup_option: NoDimensionRollup - disable_metric_extraction: false - eks_fargate_container_insights_enabled: false - endpoint: https://fake_endpoint - enhanced_container_insights: false - imds_retries: 1 - local_mode: true - log_group_name: /aws/application-signals/data - log_retention: 0 - log_stream_name: "" - max_retries: 2 - metric_declarations: - - dimensions: - - - Environment - - Operation - - Service - - - Environment - - Service - label_matchers: - - label_names: - - Telemetry.Source - regex: ^(ServerSpan|LocalRootSpan)$ - separator: ; - metric_name_selectors: - - Latency - - Fault - - Error - - dimensions: - - - Environment - - Operation - - RemoteOperation - - RemoteResourceIdentifier - - RemoteResourceType - - RemoteService - - Service - - - Environment - - Operation - - RemoteOperation - - RemoteService - - Service - - - Environment - - RemoteService - - Service - - - Environment - - RemoteOperation - - RemoteResourceIdentifier - - RemoteResourceType - - RemoteService - - Service - - - Environment - - RemoteOperation - - RemoteService - - Service - - - Environment - - RemoteResourceIdentifier - - RemoteResourceType - - RemoteService - - Service - - - RemoteResourceIdentifier - - RemoteResourceType - - RemoteService - - - RemoteService - label_matchers: - - label_names: - - Telemetry.Source - regex: ^(ClientSpan|ProducerSpan|ConsumerSpan)$ - separator: ; - metric_name_selectors: - - Latency - - Fault - - Error - - dimensions: - - - Environment - - Service - label_matchers: - - label_names: - - Telemetry.Source - regex: ^RuntimeMetric$ - separator: ; - metric_name_selectors: - - ^.*$ - middleware: agenthealth/logs - namespace: ApplicationSignals - no_verify_ssl: false - num_workers: 8 - output_destination: cloudwatch - profile: AmazonCloudWatchAgent - proxy_address: "" - region: us-east-1 - request_timeout_seconds: 30 - resource_arn: "" - resource_to_telemetry_conversion: - enabled: false - retain_initial_value_of_delta_metric: false - role_arn: "" - shared_credentials_file: - - fake-path - version: "1" - awsxray/application_signals: - certificate_file_path: "" - endpoint: https://fake_endpoint - imds_retries: 1 - index_all_attributes: false - indexed_attributes: - - aws.local.service - - aws.local.operation - - aws.local.environment - - aws.remote.service - - aws.remote.operation - - aws.remote.environment - - aws.remote.resource.identifier - - aws.remote.resource.type - local_mode: true - max_retries: 2 - middleware: agenthealth/traces - no_verify_ssl: false - num_workers: 8 - profile: AmazonCloudWatchAgent - proxy_address: "" - region: us-east-1 - request_timeout_seconds: 30 - resource_arn: "" - role_arn: "" - shared_credentials_file: - - fake-path - telemetry: - enabled: true - include_metadata: true + awsemf/application_signals: + certificate_file_path: "" + detailed_metrics: false + dimension_rollup_option: NoDimensionRollup + disable_metric_extraction: false + eks_fargate_container_insights_enabled: false + endpoint: https://fake_endpoint + enhanced_container_insights: false + imds_retries: 1 + local_mode: true + log_group_name: /aws/application-signals/data + log_retention: 0 + log_stream_name: "" + max_retries: 2 + metric_declarations: + - dimensions: + - - Environment + - Operation + - Service + - - Environment + - Service + label_matchers: + - label_names: + - Telemetry.Source + regex: ^(ServerSpan|LocalRootSpan)$ + separator: ; + metric_name_selectors: + - Latency + - Fault + - Error + - dimensions: + - - Environment + - Operation + - RemoteEnvironment + - RemoteOperation + - RemoteResourceIdentifier + - RemoteResourceType + - RemoteService + - Service + - - Environment + - Operation + - RemoteEnvironment + - RemoteOperation + - RemoteService + - Service + - - Environment + - Operation + - RemoteOperation + - RemoteResourceIdentifier + - RemoteResourceType + - RemoteService + - Service + - - Environment + - Operation + - RemoteOperation + - RemoteService + - Service + - - Environment + - RemoteEnvironment + - RemoteService + - Service + - - Environment + - RemoteService + - Service + - - Environment + - RemoteEnvironment + - RemoteOperation + - RemoteResourceIdentifier + - RemoteResourceType + - RemoteService + - Service + - - Environment + - RemoteEnvironment + - RemoteOperation + - RemoteService + - Service + - - Environment + - RemoteOperation + - RemoteResourceIdentifier + - RemoteResourceType + - RemoteService + - Service + - - Environment + - RemoteOperation + - RemoteService + - Service + - - Environment + - RemoteResourceIdentifier + - RemoteResourceType + - RemoteService + - Service + - - RemoteResourceIdentifier + - RemoteResourceType + - RemoteService + - - RemoteService + label_matchers: + - label_names: + - Telemetry.Source + regex: ^(ClientSpan|ProducerSpan|ConsumerSpan)$ + separator: ; + metric_name_selectors: + - Latency + - Fault + - Error + - dimensions: + - [ Environment, Service ] + label_matchers: + - label_names: + - Telemetry.Source + regex: '^RuntimeMetric$' + separator: ; + metric_name_selectors: + - '^.*$' + middleware: agenthealth/logs + namespace: ApplicationSignals + no_verify_ssl: false + num_workers: 8 + output_destination: cloudwatch + profile: AmazonCloudWatchAgent + proxy_address: "" + region: us-east-1 + request_timeout_seconds: 30 + resource_arn: "" + resource_to_telemetry_conversion: + enabled: false + retain_initial_value_of_delta_metric: false + role_arn: "" + shared_credentials_file: + - fake-path + version: "1" + awsxray/application_signals: + certificate_file_path: "" + endpoint: https://fake_endpoint + imds_retries: 1 + index_all_attributes: false + indexed_attributes: + - aws.local.service + - aws.local.operation + - aws.local.environment + - aws.remote.service + - aws.remote.operation + - aws.remote.environment + - aws.remote.resource.identifier + - aws.remote.resource.type + local_mode: true + max_retries: 2 + middleware: agenthealth/traces + no_verify_ssl: false + num_workers: 8 + profile: AmazonCloudWatchAgent + proxy_address: "" + region: us-east-1 + request_timeout_seconds: 30 + resource_arn: "" + role_arn: "" + shared_credentials_file: + - fake-path + telemetry: + enabled: true + include_metadata: true extensions: agenthealth/logs: is_usage_data_enabled: true diff --git a/translator/translate/otel/exporter/awsemf/appsignals_config_generic.yaml b/translator/translate/otel/exporter/awsemf/appsignals_config_generic.yaml deleted file mode 100644 index 57d10c5c6b..0000000000 --- a/translator/translate/otel/exporter/awsemf/appsignals_config_generic.yaml +++ /dev/null @@ -1,41 +0,0 @@ -log_group_name: "/aws/application-signals/data" -namespace: "ApplicationSignals" -middleware: agenthealth/logs -dimension_rollup_option: "NoDimensionRollup" -metric_declarations: - - dimensions: - - [Environment, Service, Operation] - - [Environment, Service] - label_matchers: - - label_names: - - Telemetry.Source - regex: '^(ServerSpan|LocalRootSpan)$' - metric_name_selectors: - - Latency - - Fault - - Error - - dimensions: - - [Environment, Service, Operation, RemoteService, RemoteOperation, RemoteResourceIdentifier, RemoteResourceType] - - [Environment, Service, Operation, RemoteService, RemoteOperation] - - [Environment, Service, RemoteService] - - [Environment, Service, RemoteService, RemoteOperation, RemoteResourceIdentifier, RemoteResourceType] - - [Environment, Service, RemoteService, RemoteOperation] - - [Environment, Service, RemoteService, RemoteResourceIdentifier, RemoteResourceType] - - [RemoteService, RemoteResourceIdentifier, RemoteResourceType] - - [RemoteService] - label_matchers: - - label_names: - - Telemetry.Source - regex: '^(ClientSpan|ProducerSpan|ConsumerSpan)$' - metric_name_selectors: - - Latency - - Fault - - Error - - dimensions: - - [Environment, Service] - label_matchers: - - label_names: - - Telemetry.Source - regex: '^RuntimeMetric$' - metric_name_selectors: - - '^.*$' \ No newline at end of file diff --git a/translator/translate/otel/exporter/awsemf/appsignals_config_k8s.yaml b/translator/translate/otel/exporter/awsemf/appsignals_config_k8s.yaml deleted file mode 100644 index 05ea848fff..0000000000 --- a/translator/translate/otel/exporter/awsemf/appsignals_config_k8s.yaml +++ /dev/null @@ -1,46 +0,0 @@ -log_group_name: "/aws/application-signals/data" -namespace: "ApplicationSignals" -middleware: agenthealth/logs -dimension_rollup_option: "NoDimensionRollup" -metric_declarations: - - dimensions: - - [Environment, Service, Operation] - - [Environment, Service] - label_matchers: - - label_names: - - Telemetry.Source - regex: ^(ServerSpan|LocalRootSpan)$ - metric_name_selectors: - - Latency - - Fault - - Error - - dimensions: - - [Environment, Service, Operation, RemoteService, RemoteOperation, RemoteEnvironment, RemoteResourceIdentifier, RemoteResourceType] - - [Environment, Service, Operation, RemoteService, RemoteOperation, RemoteEnvironment] - - [Environment, Service, Operation, RemoteService, RemoteOperation, RemoteResourceIdentifier, RemoteResourceType] - - [Environment, Service, Operation, RemoteService, RemoteOperation] - - [Environment, Service, RemoteService, RemoteEnvironment] - - [Environment, Service, RemoteService] - - [Environment, Service, RemoteService, RemoteOperation, RemoteEnvironment, RemoteResourceIdentifier, RemoteResourceType] - - [Environment, Service, RemoteService, RemoteOperation, RemoteEnvironment] - - [Environment, Service, RemoteService, RemoteOperation, RemoteResourceIdentifier, RemoteResourceType] - - [Environment, Service, RemoteService, RemoteOperation] - - [Environment, Service, RemoteService, RemoteResourceIdentifier, RemoteResourceType] - - [RemoteService, RemoteResourceIdentifier, RemoteResourceType] - - [RemoteService] - label_matchers: - - label_names: - - Telemetry.Source - regex: '^(ClientSpan|ProducerSpan|ConsumerSpan)$' - metric_name_selectors: - - Latency - - Fault - - Error - - dimensions: - - [Environment, Service] - label_matchers: - - label_names: - - Telemetry.Source - regex: '^RuntimeMetric$' - metric_name_selectors: - - '^.*$' \ No newline at end of file diff --git a/translator/translate/otel/exporter/awsemf/appsignals_config_eks.yaml b/translator/translate/otel/exporter/awsemf/awsemf_default_appsignals.yaml similarity index 100% rename from translator/translate/otel/exporter/awsemf/appsignals_config_eks.yaml rename to translator/translate/otel/exporter/awsemf/awsemf_default_appsignals.yaml diff --git a/translator/translate/otel/exporter/awsemf/translator.go b/translator/translate/otel/exporter/awsemf/translator.go index 18ed408bed..24ef1dcd82 100644 --- a/translator/translate/otel/exporter/awsemf/translator.go +++ b/translator/translate/otel/exporter/awsemf/translator.go @@ -22,7 +22,6 @@ import ( "github.com/aws/amazon-cloudwatch-agent/translator/translate/otel/common" "github.com/aws/amazon-cloudwatch-agent/translator/translate/otel/extension/agenthealth" "github.com/aws/amazon-cloudwatch-agent/translator/translate/otel/receiver/awscontainerinsight" - "github.com/aws/amazon-cloudwatch-agent/translator/util/ecsutil" ) const ( @@ -44,13 +43,7 @@ var defaultKubernetesKueueConfig string //go:embed awsemf_default_prometheus.yaml var defaultPrometheusConfig string -//go:embed appsignals_config_eks.yaml -var appSignalsConfigEks string - -//go:embed appsignals_config_k8s.yaml -var appSignalsConfigK8s string - -//go:embed appsignals_config_generic.yaml +//go:embed awsemf_default_appsignals.yaml var appSignalsConfigGeneric string //go:embed awsemf_jmx_config.yaml @@ -92,7 +85,7 @@ func (t *translator) Translate(c *confmap.Conf) (component.Config, error) { defaultConfig := defaultGenericConfig if t.isAppSignals(c) { - defaultConfig = getAppSignalsConfig() + defaultConfig = appSignalsConfigGeneric } else if t.isCiJMX(c) { defaultConfig = defaultJmxConfig } else if isEcs(c) { @@ -163,31 +156,6 @@ func (t *translator) Translate(c *confmap.Conf) (component.Config, error) { return cfg, nil } -func getAppSignalsConfig() string { - ctx := context.CurrentContext() - - mode := ctx.KubernetesMode() - if mode == "" { - mode = ctx.Mode() - } - if mode == config.ModeEC2 { - if ecsutil.GetECSUtilSingleton().IsECS() { - mode = config.ModeECS - } - } - - switch mode { - case config.ModeEKS: - return appSignalsConfigEks - case config.ModeK8sEC2, config.ModeK8sOnPrem: - return appSignalsConfigK8s - case config.ModeEC2, config.ModeECS: - return appSignalsConfigGeneric - default: - return appSignalsConfigGeneric - } -} - func (t *translator) isAppSignals(conf *confmap.Conf) bool { return (t.name == common.AppSignals || t.name == common.AppSignalsFallback) && (conf.IsSet(common.AppSignalsMetrics) || conf.IsSet(common.AppSignalsTraces) || conf.IsSet(common.AppSignalsMetricsFallback) || conf.IsSet(common.AppSignalsTracesFallback)) } diff --git a/translator/translate/otel/exporter/awsemf/translator_test.go b/translator/translate/otel/exporter/awsemf/translator_test.go index b0c779198a..5d9e6c101c 100644 --- a/translator/translate/otel/exporter/awsemf/translator_test.go +++ b/translator/translate/otel/exporter/awsemf/translator_test.go @@ -928,7 +928,7 @@ func TestTranslateAppSignals(t *testing.T) { "application_signals": map[string]any{}, }, }}, - want: testutil.GetConfWithOverrides(t, filepath.Join("appsignals_config_eks.yaml"), map[string]any{ + want: testutil.GetConfWithOverrides(t, filepath.Join("awsemf_default_appsignals.yaml"), map[string]any{ "local_mode": "false", "region": "us-east-1", "role_arn": "global_arn", @@ -944,7 +944,7 @@ func TestTranslateAppSignals(t *testing.T) { "application_signals": map[string]any{}, }, }}, - want: testutil.GetConfWithOverrides(t, filepath.Join("appsignals_config_k8s.yaml"), map[string]any{ + want: testutil.GetConfWithOverrides(t, filepath.Join("awsemf_default_appsignals.yaml"), map[string]any{ "local_mode": "true", "region": "us-east-1", "role_arn": "global_arn", @@ -960,7 +960,7 @@ func TestTranslateAppSignals(t *testing.T) { "application_signals": map[string]any{}, }, }}, - want: testutil.GetConfWithOverrides(t, filepath.Join("appsignals_config_generic.yaml"), map[string]any{ + want: testutil.GetConfWithOverrides(t, filepath.Join("awsemf_default_appsignals.yaml"), map[string]any{ "local_mode": "true", "region": "us-east-1", "role_arn": "global_arn", @@ -976,7 +976,7 @@ func TestTranslateAppSignals(t *testing.T) { "application_signals": map[string]any{}, }, }}, - want: testutil.GetConfWithOverrides(t, filepath.Join("appsignals_config_generic.yaml"), map[string]any{ + want: testutil.GetConfWithOverrides(t, filepath.Join("awsemf_default_appsignals.yaml"), map[string]any{ "local_mode": "false", "region": "us-east-1", "role_arn": "global_arn", @@ -992,7 +992,7 @@ func TestTranslateAppSignals(t *testing.T) { "app_signals": map[string]any{}, }, }}, - want: testutil.GetConfWithOverrides(t, filepath.Join("appsignals_config_eks.yaml"), map[string]any{ + want: testutil.GetConfWithOverrides(t, filepath.Join("awsemf_default_appsignals.yaml"), map[string]any{ "local_mode": "false", "region": "us-east-1", "role_arn": "global_arn", @@ -1008,7 +1008,7 @@ func TestTranslateAppSignals(t *testing.T) { "app_signals": map[string]any{}, }, }}, - want: testutil.GetConfWithOverrides(t, filepath.Join("appsignals_config_k8s.yaml"), map[string]any{ + want: testutil.GetConfWithOverrides(t, filepath.Join("awsemf_default_appsignals.yaml"), map[string]any{ "local_mode": "true", "region": "us-east-1", "role_arn": "global_arn", @@ -1024,7 +1024,7 @@ func TestTranslateAppSignals(t *testing.T) { "app_signals": map[string]any{}, }, }}, - want: testutil.GetConfWithOverrides(t, filepath.Join("appsignals_config_generic.yaml"), map[string]any{ + want: testutil.GetConfWithOverrides(t, filepath.Join("awsemf_default_appsignals.yaml"), map[string]any{ "local_mode": "true", "region": "us-east-1", "role_arn": "global_arn", @@ -1040,7 +1040,7 @@ func TestTranslateAppSignals(t *testing.T) { "app_signals": map[string]any{}, }, }}, - want: testutil.GetConfWithOverrides(t, filepath.Join("appsignals_config_generic.yaml"), map[string]any{ + want: testutil.GetConfWithOverrides(t, filepath.Join("awsemf_default_appsignals.yaml"), map[string]any{ "local_mode": "false", "region": "us-east-1", "role_arn": "global_arn", diff --git a/translator/translate/otel/processor/ec2taggerprocessor/translator.go b/translator/translate/otel/processor/ec2taggerprocessor/translator.go index 2add6ecf55..06a5e1cf02 100644 --- a/translator/translate/otel/processor/ec2taggerprocessor/translator.go +++ b/translator/translate/otel/processor/ec2taggerprocessor/translator.go @@ -14,7 +14,6 @@ import ( "github.com/aws/amazon-cloudwatch-agent/plugins/processors/ec2tagger" "github.com/aws/amazon-cloudwatch-agent/translator/translate/agent" "github.com/aws/amazon-cloudwatch-agent/translator/translate/otel/common" - "github.com/aws/amazon-cloudwatch-agent/translator/translate/otel/extension/agenthealth" ) var Ec2taggerKey = common.ConfigKey(common.MetricsKey, common.AppendDimensionsKey) @@ -64,7 +63,7 @@ func (t *translator) Translate(conf *confmap.Conf) (component.Config, error) { cfg.DiskDeviceTagKey = "device" } - cfg.MiddlewareID = &agenthealth.StatusCodeID + //cfg.MiddlewareID = &agenthealth.StatusCodeID cfg.RefreshIntervalSeconds = time.Duration(0) cfg.IMDSRetries = retryer.GetDefaultRetryNumber()