diff --git a/environment/metadata.go b/environment/metadata.go index 7a0e1e965..d40b9fa8c 100644 --- a/environment/metadata.go +++ b/environment/metadata.go @@ -44,6 +44,7 @@ type MetaData struct { InstancePlatform string AgentStartCommand string EksGpuType string + AmpWorkspaceId string } type MetaDataStrings struct { @@ -67,6 +68,7 @@ type MetaDataStrings struct { InstancePlatform string AgentStartCommand string EksGpuType string + AmpWorkspaceId string } func registerComputeType(dataString *MetaDataStrings) { @@ -215,6 +217,11 @@ func fillEKSData(e *MetaData, data *MetaDataStrings) { e.EKSClusterName = data.EKSClusterName e.EksGpuType = data.EksGpuType } + +func registerAmpWorkspaceId(dataString *MetaDataStrings) { + flag.StringVar(&(dataString.AmpWorkspaceId), "ampWorkspaceId", "", "workspace Id for Amazon Managed Prometheus (AMP)") +} + func RegisterEnvironmentMetaDataFlags() *MetaDataStrings { registerComputeType(registeredMetaDataStrings) registerECSData(registeredMetaDataStrings) @@ -230,6 +237,7 @@ func RegisterEnvironmentMetaDataFlags() *MetaDataStrings { registerInstanceId(registeredMetaDataStrings) registerInstancePlatform(registeredMetaDataStrings) registerAgentStartCommand(registeredMetaDataStrings) + registerAmpWorkspaceId(registeredMetaDataStrings) return registeredMetaDataStrings } @@ -255,6 +263,7 @@ func GetEnvironmentMetaData() *MetaData { metaDataStorage.InstancePlatform = registeredMetaDataStrings.InstancePlatform metaDataStorage.AgentStartCommand = registeredMetaDataStrings.AgentStartCommand metaDataStorage.EksGpuType = registeredMetaDataStrings.EksGpuType + metaDataStorage.AmpWorkspaceId = registeredMetaDataStrings.AmpWorkspaceId return metaDataStorage } diff --git a/generator/test_case_generator.go b/generator/test_case_generator.go index a9fee398f..f7f042267 100644 --- a/generator/test_case_generator.go +++ b/generator/test_case_generator.go @@ -110,6 +110,10 @@ var testTypeToTestConfig = map[string][]testConfig{ terraformDir: "terraform/ec2/creds", targets: map[string]map[string]struct{}{"os": {"al2": {}}}, }, + { + testDir: "./test/amp", + targets: map[string]map[string]struct{}{"os": {"al2": {}}, "arc": {"amd64": {}}}, + }, }, /* You can only place 1 mac instance on a dedicate host a single time. diff --git a/terraform/ec2/common/linux/output.tf b/terraform/ec2/common/linux/output.tf index 72a53db94..1487d50b9 100644 --- a/terraform/ec2/common/linux/output.tf +++ b/terraform/ec2/common/linux/output.tf @@ -19,4 +19,8 @@ output "proxy_instance_proxy_ip" { output "cwa_onprem_assumed_iam_role_arm" { value = module.common.cwa_onprem_assumed_iam_role_arm +} + +output "testing_id" { + value = module.common.testing_id } \ No newline at end of file diff --git a/terraform/ec2/linux/main.tf b/terraform/ec2/linux/main.tf index 1557dbf44..068cd0454 100644 --- a/terraform/ec2/linux/main.tf +++ b/terraform/ec2/linux/main.tf @@ -68,6 +68,12 @@ resource "null_resource" "integration_test_setup" { ] } +module "amp" { + count = length(regexall("/amp", var.test_dir)) > 0 ? 1 : 0 + source = "terraform-aws-modules/managed-service-prometheus/aws" + workspace_alias = "cwagent-integ-test-${module.linux_common.testing_id}" +} + resource "null_resource" "integration_test_run" { connection { type = "ssh" @@ -87,7 +93,7 @@ resource "null_resource" "integration_test_run" { "cd ~/amazon-cloudwatch-agent-test", "echo run sanity test && go test ./test/sanity -p 1 -v", var.pre_test_setup, - "go test ${var.test_dir} -p 1 -timeout 1h -computeType=EC2 -bucket=${var.s3_bucket} -plugins='${var.plugin_tests}' -excludedTests='${var.excluded_tests}' -cwaCommitSha=${var.cwa_github_sha} -caCertPath=${var.ca_cert_path} -proxyUrl=${module.linux_common.proxy_instance_proxy_ip} -instanceId=${module.linux_common.cwagent_id} -v", + "go test ${var.test_dir} -p 1 -timeout 1h -computeType=EC2 -bucket=${var.s3_bucket} -plugins='${var.plugin_tests}' -excludedTests='${var.excluded_tests}' -cwaCommitSha=${var.cwa_github_sha} -caCertPath=${var.ca_cert_path} -proxyUrl=${module.linux_common.proxy_instance_proxy_ip} -instanceId=${module.linux_common.cwagent_id} ${length(regexall("/amp", var.test_dir)) > 0 ? "-ampWorkspaceId=${module.amp[0].workspace_id} " : ""}-v", ] } diff --git a/test/amp/agent_configs/config.json b/test/amp/agent_configs/config.json new file mode 100644 index 000000000..46e25594e --- /dev/null +++ b/test/amp/agent_configs/config.json @@ -0,0 +1,65 @@ +{ + "agent": { + "metrics_collection_interval": 15, + "run_as_user": "root", + "debug": true, + "logfile": "" + }, + "metrics": { + "metrics_destinations": { + "amp": { + "workspace_id": "{workspace_id}" + }, + "cloudwatch": {} + }, + "metrics_collected": { + "cpu": { + "resources": [ + "*" + ], + "measurement": [ + { + "name": "cpu_usage_idle", + "rename": "CPU_USAGE_IDLE", + "unit": "unit" + }, + { + "name": "cpu_usage_nice", + "unit": "unit" + }, + "cpu_usage_guest", + "time_active", + "usage_active" + ], + "totalcpu": false, + "metrics_collection_interval": 10, + "append_dimensions": { + "d1": "foo", + "d2": "bar" + } + }, + "processes": { + "measurement": [ + "blocked","running","sleeping","stopped","total","dead","idle","paging","total_threads","zombies" + ], + "metrics_collection_interval": 10, + "append_dimensions": { + "d1": "foo", + "d2": "bar" + } + } + }, + "append_dimensions": { + "ImageId": "${aws:ImageId}", + "InstanceId": "${aws:InstanceId}", + "InstanceType": "${aws:InstanceType}", + "AutoScalingGroupName": "${aws:AutoScalingGroupName}" + }, + "aggregation_dimensions": [ + [ + "InstanceId", + "InstanceType" + ] + ] + } +} \ No newline at end of file diff --git a/test/amp/amp_test.go b/test/amp/amp_test.go new file mode 100644 index 000000000..dcae800cb --- /dev/null +++ b/test/amp/amp_test.go @@ -0,0 +1,271 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +//go:build !windows + +package amp + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "io" + "net/http" + "path/filepath" + "strconv" + "testing" + "time" + + "github.com/aws/aws-sdk-go-v2/aws" + sigv4 "github.com/aws/aws-sdk-go-v2/aws/signer/v4" + "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/service/cloudwatch/types" + + "github.com/aws/amazon-cloudwatch-agent-test/environment" + "github.com/aws/amazon-cloudwatch-agent-test/test/metric" + "github.com/aws/amazon-cloudwatch-agent-test/test/metric/dimension" + "github.com/aws/amazon-cloudwatch-agent-test/test/status" + "github.com/aws/amazon-cloudwatch-agent-test/test/test_runner" + "github.com/aws/amazon-cloudwatch-agent-test/util/common" +) + +type AMPResponse struct { + Status string + Data AMPResponseData +} +type AMPResponseData struct { + ResultType string + Result []AMPDataResult +} +type AMPDataResult struct { + Metric map[string]interface{} + Value []interface{} +} + +const ( + namespace = "AMPDestinationTest" + // template prometheus query for getting average of 3 min + ampQueryTemplate = "avg_over_time(%s%s[3m])" +) + +// NOTE: this should match with append_dimensions under metrics in agent config +var append_dims = map[string]string{ + "d1": "foo", + "d2": "bar", +} + +var awsConfig aws.Config +var awsCreds aws.Credentials + +func init() { + environment.RegisterEnvironmentMetaDataFlags() + + ctx := context.Background() + var err error + awsConfig, err = config.LoadDefaultConfig(ctx, config.WithRegion("us-west-2")) + if err != nil { + fmt.Println("There was an error trying to load default config: ", err) + } + awsCreds, err = awsConfig.Credentials.Retrieve(ctx) + if err != nil { + fmt.Println("There was an error trying to load credentials: ", err) + } +} + +type AmpDestinationTestRunner struct { + test_runner.BaseTestRunner +} + +func (t AmpDestinationTestRunner) Validate() status.TestGroupResult { + metricsToFetch := t.GetMeasuredMetrics() + testResults := make([]status.TestResult, len(metricsToFetch)) + time.Sleep(30 * time.Second) + for i, metricName := range metricsToFetch { + testResults[i] = t.validateMetric(metricName) + } + + return status.TestGroupResult{ + Name: t.GetTestName(), + TestResults: testResults, + } +} + +func (t *AmpDestinationTestRunner) validateMetric(metricName string) status.TestResult { + env := environment.GetEnvironmentMetaData() + + testResult := status.TestResult{ + Name: metricName, + Status: status.FAILED, + } + + // NOTE: dims must match aggregation_dimensions from agent config to fetch metrics. + // the idea is to fetch all metrics including non-aggregated metrics with matching dim set + // then validate if the returned list of metrics include metrics (non-aggregated) with append_dimensions as labels + dims := getDimensions() + if len(dims) == 0 { + return testResult + } + + res, err := queryAMPMetrics(env.AmpWorkspaceId, buildPrometheusQuery(metricName, dims)) + if err != nil { + fmt.Printf("failed to fetch metric values from AMP for %s: %s\n", metricName, err) + return testResult + } + var responseJson AMPResponse + err = json.Unmarshal(res, &responseJson) + if err != nil { + fmt.Printf("failed to unmarshal AMP response: %s\n", err) + return testResult + } + + if len(responseJson.Data.Result) == 0 { + fmt.Printf("AMP metric values are missing for %s\n", metricName) + return testResult + } + + foundAppendDimMetric := true + metricVals := []float64{} + for _, dataResult := range responseJson.Data.Result { + if len(dataResult.Value) < 1 { + continue + } + // metric value is returned as a tuple of timestamp and value (ec. '"value": [1721843955, "26"]') + val, _ := strconv.ParseFloat(dataResult.Value[1].(string), 64) + metricVals = append(metricVals, val) + + // metrics with more labels than fetched dims must be non-aggregated metrics which include append_dimensions as labels + if len(dataResult.Metric) > len(dims) { + foundAppendDimMetric = foundAppendDimMetric && matchDimensions(dataResult.Metric) + } + } + + // at least 2 metrics are expected with 1 set of aggregation_dimensions + // 1 non-aggregated + 1 aggregated minimum + if len(metricVals) < 2 || !foundAppendDimMetric { + fmt.Println("failed with less metric values than expected or missing append_dimensions") + return testResult + } + + if !metric.IsAllValuesGreaterThanOrEqualToExpectedValue(metricName, metricVals, 0) { + return testResult + } + + testResult.Status = status.SUCCESSFUL + return testResult +} + +func (t AmpDestinationTestRunner) GetTestName() string { + return namespace +} + +func (t AmpDestinationTestRunner) GetAgentConfigFileName() string { + return "config.json" +} + +func (t AmpDestinationTestRunner) GetMeasuredMetrics() []string { + return []string{ + "CPU_USAGE_IDLE", "cpu_usage_nice", "cpu_usage_guest", "cpu_time_active", "cpu_usage_active", + "processes_blocked", "processes_dead", "processes_idle", "processes_paging", "processes_running", + "processes_sleeping", "processes_stopped", "processes_total", "processes_total_threads", "processes_zombies", + //"jvm.threads.count", "jvm.memory.heap.used", "jvm.memory.heap.max", "jvm.memory.heap.init", + } +} + +func (t *AmpDestinationTestRunner) SetupBeforeAgentRun() error { + env := environment.GetEnvironmentMetaData() + err := t.BaseTestRunner.SetupBeforeAgentRun() + if err != nil { + return err + } + // replace AMP workspace ID placeholder with a testing workspace ID from metadata + agentConfigPath := filepath.Join("agent_configs", t.GetAgentConfigFileName()) + ampCommands := []string{ + "sed -ie 's/{workspace_id}/" + env.AmpWorkspaceId + "/g' " + agentConfigPath, + // use below to add JMX metrics then update agent config & GetMeasuredMetrics() + //"nohup java -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.port=2030 -Dcom.sun.management.jmxremote.local.only=false -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.rmi.port=2030 -Dcom.sun.management.jmxremote.host=0.0.0.0 -Djava.rmi.server.hostname=0.0.0.0 -Dserver.port=8090 -Dspring.application.admin.enabled=true -jar jars/spring-boot-web-starter-tomcat.jar > /tmp/spring-boot-web-starter-tomcat-jar.txt 2>&1 &", + } + err = common.RunCommands(ampCommands) + if err != nil { + return err + } + return t.SetUpConfig() +} + +func TestAmp(t *testing.T) { + runner := test_runner.TestRunner{TestRunner: &AmpDestinationTestRunner{ + test_runner.BaseTestRunner{}, + }} + result := runner.Run() + if result.GetStatus() != status.SUCCESSFUL { + t.Fatal("AMP Destination test failed") + result.Print() + } +} + +func getDimensions() []types.Dimension { + env := environment.GetEnvironmentMetaData() + factory := dimension.GetDimensionFactory(*env) + dims, failed := factory.GetDimensions([]dimension.Instruction{ + { + Key: "InstanceId", + Value: dimension.UnknownDimensionValue(), + }, + { + Key: "InstanceType", + Value: dimension.UnknownDimensionValue(), + }, + }) + + if len(failed) > 0 { + return []types.Dimension{} + } + + return dims +} + +func buildPrometheusQuery(metricName string, dims []types.Dimension) string { + dimsStr := "" + for _, dim := range dims { + dimsStr = fmt.Sprintf("%s%s=\"%s\", ", dimsStr, *dim.Name, *dim.Value) + } + if len(dimsStr) > 0 { + dimsStr = dimsStr[:len(dimsStr)-1] + } + return fmt.Sprintf(ampQueryTemplate, metricName, "{"+dimsStr+"}") +} + +func queryAMPMetrics(wsId string, q string) ([]byte, error) { + url := fmt.Sprintf("https://aps-workspaces.%s.amazonaws.com/workspaces/%s/api/v1/query?query=%s", awsConfig.Region, wsId, q) + req, err := http.NewRequest(http.MethodPost, url, nil) + if err != nil { + return nil, err + } + + signer := sigv4.NewSigner() + err = signer.SignHTTP(context.Background(), awsCreds, req, hex.EncodeToString(sha256.New().Sum(nil)), "aps", awsConfig.Region, time.Now().UTC()) + if err != nil { + return nil, err + } + + res, err := http.DefaultClient.Do(req) + if err != nil { + return nil, err + } + return io.ReadAll(res.Body) +} + +func matchDimensions(labels map[string]interface{}) bool { + if len(append_dims) > len(labels) { + return false + } + for k, v := range append_dims { + if lv, found := labels[k]; !found || lv != v { + return false + } + } + return true +} + +var _ test_runner.ITestRunner = (*AmpDestinationTestRunner)(nil)