diff --git a/.github/workflows/PR-build.yml b/.github/workflows/PR-build.yml index 969a495f89..74029306ef 100644 --- a/.github/workflows/PR-build.yml +++ b/.github/workflows/PR-build.yml @@ -76,14 +76,14 @@ jobs: strategy: fail-fast: false matrix: - os: [ ubuntu-latest, windows-2019, windows-latest, macos-11] + os: [ ubuntu-latest, windows-2019, windows-latest, macos-12] include: - os: ubuntu-latest family: linux cache-path: | ~/.cache/go-build ~/go/pkg/mod - - os: macos-11 + - os: macos-12 family: darwin cache-path: | ~/Library/Caches/go-build diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml index 4c806cba83..62e03ebf78 100644 --- a/.github/workflows/integration-test.yml +++ b/.github/workflows/integration-test.yml @@ -125,6 +125,8 @@ jobs: eks_deployment_matrix: ${{ steps.set-matrix.outputs.eks_deployment_matrix }} ec2_linux_itar_matrix: ${{ steps.set-matrix.outputs.ec2_linux_itar_matrix }} ec2_linux_china_matrix: ${{ steps.set-matrix.outputs.ec2_linux_china_matrix }} + eks_addon_matrix: ${{ steps.set-matrix.outputs.eks_addon_matrix }} + steps: - uses: actions/checkout@v3 @@ -142,6 +144,7 @@ jobs: run: | go run --tags=generator generator/test_case_generator.go echo "::set-output name=ec2_gpu_matrix::$(echo $(cat generator/resources/ec2_gpu_complete_test_matrix.json))" + echo "::set-output name=eks_addon_matrix::$(echo $(cat generator/resources/eks_addon_complete_test_matrix.json))" echo "::set-output name=ec2_linux_matrix::$(echo $(cat generator/resources/ec2_linux_complete_test_matrix.json))" echo "::set-output name=ec2_windows_matrix::$(echo $(cat generator/resources/ec2_windows_complete_test_matrix.json))" echo "::set-output name=ec2_mac_matrix::$(echo $(cat generator/resources/ec2_mac_complete_test_matrix.json))" @@ -159,6 +162,7 @@ jobs: - name: Echo test plan matrix run: | echo "ec2_gpu_matrix: ${{ steps.set-matrix.outputs.ec2_gpu_matrix }}" + echo "eks_addon_matrix: ${{ steps.set-matrix.outputs.eks_addon_matrix }}" echo "ec2_linux_matrix: ${{ steps.set-matrix.outputs.ec2_linux_matrix }}" echo "ec2_windows_matrix: ${{ steps.set-matrix.outputs.ec2_windows_matrix }}" echo "ec2_mac_matrix: ${{ steps.set-matrix.outputs.ec2_mac_matrix }}" @@ -1354,4 +1358,99 @@ jobs: permissions: id-token: write contents: read - secrets: inherit \ No newline at end of file + secrets: inherit + + + GPUEndToEndTest: + name: "GPU E2E Test" + needs: [ BuildAndUpload, StartLocalStack, GenerateTestMatrix, OutputEnvVariables ] + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + arrays: ${{ fromJson(needs.GenerateTestMatrix.outputs.eks_addon_matrix) }} + permissions: + id-token: write + contents: read + steps: + - uses: actions/checkout@v3 + with: + repository: ${{env.CWA_GITHUB_TEST_REPO_NAME}} + ref: ${{env.CWA_GITHUB_TEST_REPO_BRANCH}} + + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v2 + with: + role-to-assume: ${{ env.TERRAFORM_AWS_ASSUME_ROLE }} + aws-region: us-west-2 + role-duration-seconds: ${{ env.TERRAFORM_AWS_ASSUME_ROLE_DURATION }} + + + - name: Verify Terraform version + run: terraform --version + + + - name: Terraform apply and setup + run: | + if [ "${{ matrix.arrays.terraform_dir }}" != "" ]; then + cd "${{ matrix.arrays.terraform_dir }}" + else + cd terraform/eks/addon/gpu + fi + + terraform init + if terraform apply --auto-approve \ + -var="beta=true" \ + -var="addon_name=amazon-cloudwatch-observability" \ + -var="addon_version=v1.6.0-eksbuild.1" \ + -var="k8s_version=1.29" ; then + echo "Terraform apply successful." + + # Capture the output + echo "Getting EKS cluster name" + EKS_CLUSTER_NAME=$(terraform output -raw eks_cluster_name) + echo "Cluster name is ${EKS_CLUSTER_NAME}" + kubectl apply -f ./gpuBurner.yaml + kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.15.0/deployments/static/nvidia-device-plugin.yml + kubectl patch amazoncloudwatchagents -n amazon-cloudwatch cloudwatch-agent --type='json' -p='[{"op": "replace", "path": "/spec/image", "value": ${{ secrets.AWS_ECR_PRIVATE_REGISTRY }}/${{ env.ECR_INTEGRATION_TEST_REPO }}:${{ github.sha }}}]' + else + terraform destroy -var="beta=${{ github.event.inputs.run_in_beta }}" -auto-approve && exit 1 + fi + + - name: Run Go tests with retry + uses: nick-fields/retry@v2 + with: + max_attempts: 10 + timeout_minutes: 60 + retry_wait_seconds: 60 + command: | + if [ "${{ matrix.arrays.terraform_dir }}" != "" ]; then + cd "${{ matrix.arrays.terraform_dir }}" + else + cd terraform/eks/addon/gpu + fi + echo "Getting EKS cluster name" + EKS_CLUSTER_NAME=$(terraform output -raw eks_cluster_name) + echo "Cluster name is ${EKS_CLUSTER_NAME}" + + if go test ${{ matrix.arrays.test_dir }} -eksClusterName ${EKS_CLUSTER_NAME} -computeType=EKS -v -eksDeploymentStrategy=DAEMON -eksGpuType=nvidia -useE2EMetrics; then + echo "Tests passed" + else + echo "Tests failed" + exit 1 + fi + + - name: Terraform destroy + if: always() + uses: nick-fields/retry@v2 + with: + max_attempts: 3 + timeout_minutes: 8 + retry_wait_seconds: 5 + command: | + if [ "${{ matrix.arrays.terraform_dir }}" != "" ]; then + cd "${{ matrix.arrays.terraform_dir }}" + else + cd terraform/eks/addon/gpu + fi + terraform destroy --auto-approve \ No newline at end of file diff --git a/.github/workflows/test-build-packages.yml b/.github/workflows/test-build-packages.yml index 12673c19d3..c2e4e3c1b9 100644 --- a/.github/workflows/test-build-packages.yml +++ b/.github/workflows/test-build-packages.yml @@ -62,7 +62,7 @@ on: jobs: MakeMacPkg: name: 'MakeMacPkg' - runs-on: macos-11 + runs-on: macos-12 permissions: id-token: write contents: read diff --git a/RELEASE_NOTES b/RELEASE_NOTES index 9e5cea56a9..c1a07743fa 100644 --- a/RELEASE_NOTES +++ b/RELEASE_NOTES @@ -1,11 +1,27 @@ +======================================================================== +Amazon CloudWatch Agent 1.300041.0 (2024-06-07) +======================================================================== + +Features: +* Support JMX metric collection for JVM, Tomcat, and Kafka on EC2 + +Enhancements: +* [ContainerInsights] Add NVIDIA GPU count metrics +* [Application Signals] Enable detailed logging for metrics/traces when debug is enabled + +Bug fixes: +* [Logs/Windows Event] Fix load state offset parsing to support unsigned int +* [ContainerInsights] Prevent non-active workload GPU pod metrics from being emitted + ======================================================================== Amazon CloudWatch Agent 1.300040.0 (2024-05-21) ======================================================================== -Enhancements -* [Application Signals] Export emf logs to /aws/application-signals/data + +Enhancements: +* [Application Signals] Export EMF logs to /aws/application-signals/data * [Application Signals] Rename metric namespace to Application Signals * [Application Signals] Change metric schema from HostedIn to Environment -* Trim AWS prefix for spans in xray exporter +* Trim AWS prefix for spans in X-Ray exporter Bug fixes: * Fix panic when using amazon-cloudwatch-agent-ctl -a cond-restart @@ -13,21 +29,24 @@ Bug fixes: ======================================================================== Amazon CloudWatch Agent 1.300039.0 (2024-05-03) ======================================================================== + Features: * [Metrics] Append Dimension Volume Id For Metrics -Enhancements +Enhancements: * Upgrade OTEL Contrib To v0.98.0 ======================================================================== Amazon CloudWatch Agent 1.300037.1 (2024-04-26) ======================================================================== + Bug fixes: * Fix nil referencing issue while decorating container insights metrics ======================================================================== Amazon CloudWatch Agent 1.300037.0 (2024-04-11) ======================================================================== + Features: * [ContainerInsights] Add Elastic Fabric Adapter (EFA) observability with Kubernetes @@ -41,18 +60,21 @@ Bug fixes: ======================================================================== Amazon CloudWatch Agent 1.300036.0 (2024-04-05) ======================================================================== + Features: * [ContainerInsights] Add AWS Trainium & Inferentia observability with Kubernetes ======================================================================== Amazon CloudWatch Agent 1.300035.0 (2024-03-18) ======================================================================== + Features: * [ContainerInsights] Add Container Insights for Windows in kubernetes. ======================================================================== Amazon CloudWatch Agent 1.300034.1 (2024-03-14) ======================================================================== + Bug fixes: * [AppSignals] Explicitly set the default GC interval for metrics limiter @@ -62,6 +84,7 @@ Enhancements: ======================================================================== Amazon CloudWatch Agent 1.300034.0 (2024-03-04) ======================================================================== + Features: * [ContainerInsights] Add NVIDIA GPU observability with Kubernetes diff --git a/plugins/processors/awsapplicationsignals/internal/normalizer/attributesnormalizer.go b/plugins/processors/awsapplicationsignals/internal/normalizer/attributesnormalizer.go index b0966bca2c..114ec8542b 100644 --- a/plugins/processors/awsapplicationsignals/internal/normalizer/attributesnormalizer.go +++ b/plugins/processors/awsapplicationsignals/internal/normalizer/attributesnormalizer.go @@ -70,6 +70,7 @@ var copyMapForMetric = map[string]string{ semconv.AttributeK8SJobName: "K8s.Workload", semconv.AttributeK8SCronJobName: "K8s.Workload", semconv.AttributeK8SPodName: "K8s.Pod", + semconv.AttributeAWSLogGroupNames: "aws.log.group.names", } const ( diff --git a/translator/config/sampleSchema/validEthtoolConfig.json b/translator/config/sampleSchema/validEthtoolConfig.json index 29d88d7cb4..e9156aa7d2 100644 --- a/translator/config/sampleSchema/validEthtoolConfig.json +++ b/translator/config/sampleSchema/validEthtoolConfig.json @@ -1,27 +1,30 @@ { - "metrics": { - "metrics_collected": { - "ethtool": { - "interface_include": [ - "eth0", - "eth1" - ], - "metrics_include": [ - "bw_in_allowance_exceeded", - "bw_out_allowance_exceeded", - "pps_allowance_exceeded", - "conntrack_allowance_exceeded", - "linklocal_allowance_exceeded" - ] + "metrics": { + "metrics_collected": { + "ethtool": { + "interface_include": [ + "eth0", + "eth1" + ], + "metrics_include": [ + "bw_in_allowance_exceeded", + "bw_out_allowance_exceeded", + "pps_allowance_exceeded", + "conntrack_allowance_exceeded", + "linklocal_allowance_exceeded" + ], + "append_dimensions": { + "name": "sampleName" } - }, - "append_dimensions": { - "ImageId": "${aws:ImageId}", - "InstanceId": "${aws:InstanceId}", - "InstanceType": "${aws:InstanceType}", - "AutoScalingGroupName": "${aws:AutoScalingGroupName}" - }, - "aggregation_dimensions" : [["ImageId"], ["InstanceId", "InstanceType"], ["d1"],[]], - "force_flush_interval": 60 - } - } \ No newline at end of file + } + }, + "append_dimensions": { + "ImageId": "${aws:ImageId}", + "InstanceId": "${aws:InstanceId}", + "InstanceType": "${aws:InstanceType}", + "AutoScalingGroupName": "${aws:AutoScalingGroupName}" + }, + "aggregation_dimensions" : [["ImageId"], ["InstanceId", "InstanceType"], ["d1"],[]], + "force_flush_interval": 60 + } +} \ No newline at end of file diff --git a/translator/config/schema.json b/translator/config/schema.json index 76f3845470..68505ed585 100644 --- a/translator/config/schema.json +++ b/translator/config/schema.json @@ -468,6 +468,9 @@ "minLength": 1, "maxLength": 255 } + }, + "append_dimensions": { + "$ref": "#/definitions/generalAppendDimensionsDefinition" } }, "additionalProperties": false diff --git a/translator/translate/metrics/metrics_collect/ethtool/ethtool.go b/translator/translate/metrics/metrics_collect/ethtool/ethtool.go index 3de161e340..f0ca82c443 100644 --- a/translator/translate/metrics/metrics_collect/ethtool/ethtool.go +++ b/translator/translate/metrics/metrics_collect/ethtool/ethtool.go @@ -6,6 +6,7 @@ package ethtool import ( "github.com/aws/amazon-cloudwatch-agent/translator" parent "github.com/aws/amazon-cloudwatch-agent/translator/translate/metrics/metrics_collect" + "github.com/aws/amazon-cloudwatch-agent/translator/translate/metrics/util" ) var ChildRule = map[string]translator.Rule{} @@ -16,14 +17,14 @@ var ChildRule = map[string]translator.Rule{} // "metrics_include": [ // "bw_in_allowance_exceeded", // "bw_out_allowance_exceeded" -// ] -// } -const SectionKey_Ethtool = "ethtool" +// ], +// "append_dimensions":{ +// key:value +// } +// +// } -func GetCurPath() string { - curPath := parent.GetCurPath() + SectionKey_Ethtool + "/" - return curPath -} +const SectionKey_Ethtool = "ethtool" func RegisterRule(fieldname string, r translator.Rule) { ChildRule[fieldname] = r @@ -49,7 +50,10 @@ func (n *Ethtool) ApplyRule(input interface{}) (returnKey string, returnVal inte resArr = append(resArr, result) returnKey = SectionKey_Ethtool returnVal = resArr + //Process tags + util.ProcessAppendDimensions(m[SectionKey_Ethtool].(map[string]interface{}), SectionKey_Ethtool, result) } + return } diff --git a/translator/translate/metrics/metrics_collect/ethtool/ethtool_test.go b/translator/translate/metrics/metrics_collect/ethtool/ethtool_test.go index 42cc6eb769..d2f11bf513 100644 --- a/translator/translate/metrics/metrics_collect/ethtool/ethtool_test.go +++ b/translator/translate/metrics/metrics_collect/ethtool/ethtool_test.go @@ -37,7 +37,10 @@ func TestFullConfig(t *testing.T) { ], "metrics_include": [ "bw_in_allowance_exceeded" - ] + ], + "append_dimensions":{ + "name":"sampleName" + } }}`), &input) assert.NoError(t, err) _, actual := d.ApplyRule(input) @@ -46,6 +49,7 @@ func TestFullConfig(t *testing.T) { "interface_include": []string{"eth0"}, "interface_exclude": []string{"eth1"}, "fieldpass": []string{"bw_in_allowance_exceeded"}, + "tags": map[string]interface{}{"name": "sampleName"}, }, } diff --git a/translator/translate/metrics/util/commonconfigutil.go b/translator/translate/metrics/util/commonconfigutil.go index 770aabfb8d..855653657e 100755 --- a/translator/translate/metrics/util/commonconfigutil.go +++ b/translator/translate/metrics/util/commonconfigutil.go @@ -29,7 +29,6 @@ const ( // ProcessLinuxCommonConfig is used by both Linux and Darwin. func ProcessLinuxCommonConfig(input interface{}, pluginName string, path string, result map[string]interface{}) bool { - isHighResolution := IsHighResolution(agent.Global_Config.Interval) inputMap := input.(map[string]interface{}) // Generate allowlisted metric list, process only if Measurement_Key exist if translator.IsValid(inputMap, Measurement_Key, path) { @@ -49,9 +48,21 @@ func ProcessLinuxCommonConfig(input interface{}, pluginName string, path string, return false } - // Set input plugin specific interval - isHighResolution = setTimeInterval(inputMap, result, isHighResolution, pluginName) + ProcessAppendDimensions(inputMap, pluginName, result) + isHighResolution := IsHighResolution(agent.Global_Config.Interval) + isHighResolution = setTimeInterval(inputMap, result, isHighResolution, pluginName) + // Add HighResolution tags + if isHighResolution { + if result[Append_Dimensions_Mapped_Key] != nil { + util.AddHighResolutionTag(result[Append_Dimensions_Mapped_Key]) + } else { + result[Append_Dimensions_Mapped_Key] = map[string]interface{}{util.High_Resolution_Tag_Key: "true"} + } + } + return true +} +func ProcessAppendDimensions(inputMap map[string]interface{}, pluginName string, result map[string]interface{}) { // Set append_dimensions as tags if val, ok := inputMap[Append_Dimensions_Key]; ok { result[Append_Dimensions_Mapped_Key] = util.FilterReservedKeys(val) @@ -63,16 +74,6 @@ func ProcessLinuxCommonConfig(input interface{}, pluginName string, path string, result[key] = val } } - - // Add HighResolution tags - if isHighResolution { - if result[Append_Dimensions_Mapped_Key] != nil { - util.AddHighResolutionTag(result[Append_Dimensions_Mapped_Key]) - } else { - result[Append_Dimensions_Mapped_Key] = map[string]interface{}{util.High_Resolution_Tag_Key: "true"} - } - } - return true } // Windows common config returnVal would be three parts: