Skip to content

Commit

Permalink
Integration Test - Implement the validation for metric value availabi…
Browse files Browse the repository at this point in the history
…lity and duplicate EMF logs (aws#394)
  • Loading branch information
Paramadon authored Mar 1, 2024
1 parent 12587c4 commit 2e403f6
Show file tree
Hide file tree
Showing 5 changed files with 390 additions and 66 deletions.
40 changes: 38 additions & 2 deletions test/metric/metric_list_query.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,18 +31,54 @@ func (n *MetricListFetcher) Fetch(namespace, metricName string, dimensions []typ

listMetricInput := cloudwatch.ListMetricsInput{
Namespace: aws.String(namespace),
MetricName: aws.String(metricName),
Dimensions: dims,
}
if len(metricName) > 0 {
listMetricInput.MetricName = aws.String(metricName)
}

log.Printf("Metric data input: namespace %v, name %v", namespace, metricName)
var metrics []types.Metric
for {
// get a complete list of metrics with given dimensions
output, err := awsservice.CwmClient.ListMetrics(context.Background(), &listMetricInput)
if err != nil {
return nil, fmt.Errorf("Error getting metric data %v", err)
}
metrics = append(metrics, output.Metrics...)
// nil or empty nextToken means there is no more data to be fetched
nextToken := output.NextToken
if nextToken == nil || *nextToken == "" {
break
}
listMetricInput.NextToken = nextToken
}
log.Printf("total number of metrics fetched: %v", len(metrics))
return metrics, nil
}

func (n *MetricListFetcher) FetchByDimension(namespace string, dimensions []types.Dimension) ([]types.Metric, error) {
var dims []types.DimensionFilter
for _, dim := range dimensions {
dims = append(dims, types.DimensionFilter{
Name: dim.Name,
Value: dim.Value,
})
}

listMetricInput := cloudwatch.ListMetricsInput{
Namespace: aws.String(namespace),
Dimensions: dims,
}

log.Printf("Metric data input: namespace %v, dimensions %v", namespace, fmt.Sprint(&dims))

output, err := awsservice.CwmClient.ListMetrics(context.Background(), &listMetricInput)
if err != nil {
return nil, fmt.Errorf("Error getting metric data %v", err)
}

log.Printf("Metrics fetched : %s", fmt.Sprint(output))
log.Printf("Metrics fetched : %v", output.Metrics)

return output.Metrics, nil
}
2 changes: 2 additions & 0 deletions test/metric/stat.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,6 @@ const (
MAXUMUM Statistics = "Maxmimum"
SUM Statistics = "Sum"
HighResolutionStatPeriod = 10

MinuteStatPeriod = 60
)
179 changes: 133 additions & 46 deletions test/metric_value_benchmark/eks_daemonset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,92 +10,178 @@ import (
"errors"
"fmt"
"log"
"math/rand"
"sort"
"strings"
"time"

"github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/service/cloudwatch/types"
"golang.org/x/exp/slices"

"github.com/aws/amazon-cloudwatch-agent-test/environment"
"github.com/aws/amazon-cloudwatch-agent-test/test/metric"
"github.com/aws/amazon-cloudwatch-agent-test/test/metric/dimension"
"github.com/aws/amazon-cloudwatch-agent-test/test/metric_value_benchmark/eks_resources"
"github.com/aws/amazon-cloudwatch-agent-test/test/status"
"github.com/aws/amazon-cloudwatch-agent-test/test/test_runner"
"github.com/aws/amazon-cloudwatch-agent-test/util/awsservice"
)

const containerInsightsNamespace = "ContainerInsights"

// list of metrics with more dimensions e.g. PodName and Namespace
var metricsWithMoreDimensions = []string{"pod_number_of_container_restarts"}
const gpuMetricIndicator = "_gpu_"

type EKSDaemonTestRunner struct {
test_runner.BaseTestRunner
env *environment.MetaData
testName string
env *environment.MetaData
}

func (e *EKSDaemonTestRunner) Validate() status.TestGroupResult {
metrics := e.GetMeasuredMetrics()
testResults := make([]status.TestResult, 0)
for _, name := range metrics {
testResults = append(testResults, e.validateInstanceMetrics(name))
}

var testResults []status.TestResult
testResults = append(testResults, validateMetrics(e.env, gpuMetricIndicator, eks_resources.ExpectedDimsToMetrics)...)
testResults = append(testResults, e.validateLogs(e.env))
return status.TestGroupResult{
Name: e.GetTestName(),
TestResults: testResults,
}
}

func (e *EKSDaemonTestRunner) validateInstanceMetrics(name string) status.TestResult {
testResult := status.TestResult{
Name: name,
Status: status.FAILED,
const (
dimDelimiter = "-"
ContainerInsightsNamespace = "ContainerInsights"
)

type dimToMetrics struct {
// dim keys as string with dimDelimiter(-) eg. ClusterName-Namespace
dimStr string
// metric names to their dimensions with values. Dimension sets will be used for metric data validations
metrics map[string][][]types.Dimension
}

func validateMetrics(env *environment.MetaData, metricFilter string, expectedDimsToMetrics map[string][]string) []status.TestResult {
var results []status.TestResult
dimsToMetrics := getMetricsInClusterDimension(env, metricFilter)
//loops through each dimension set and checks if they exit in the cluster(fails if it doesn't)
for dims, metrics := range expectedDimsToMetrics {
var actual map[string][][]types.Dimension
//looping through dtms until we find the dimension string equal to the one in the hard coded map
for _, dtm := range dimsToMetrics {
log.Printf("dtm: %s vs dims %s", dtm.dimStr, dims) //testing purposes
if dtm.dimStr == dims {
actual = dtm.metrics
break
}
}
//if there are no metrics for the dimension set, we fail the test
if len(actual) < 1 {
results = append(results, status.TestResult{
Name: dims,
Status: status.FAILED,
})
log.Printf("ValidateMetrics failed with missing dimension set: %s", dims)
// keep testing other dims or fail early?
continue
}
//verifies length of metrics for dimension set
results = append(results, validateMetricsAvailability(dims, metrics, actual))
for _, m := range metrics {
// picking a random dimension set to test metric data so we don't have to test every dimension set
randIdx := rand.Intn(len(actual[m]))
//verifys values of metrics
results = append(results, validateMetricValue(m, actual[m][randIdx]))
}
}
return results
}

dims, failed := e.DimensionFactory.GetDimensions([]dimension.Instruction{
// Fetches all metrics in cluster
func getMetricsInClusterDimension(env *environment.MetaData, metricFilter string) []dimToMetrics { //map[string]map[string]interface{} {
listFetcher := metric.MetricListFetcher{}
log.Printf("Fetching by cluster dimension")
dims := []types.Dimension{
{
Key: "ClusterName",
Value: dimension.UnknownDimensionValue(),
Name: aws.String("ClusterName"),
Value: aws.String(env.EKSClusterName),
},
})
if len(failed) > 0 {
log.Println("failed to get dimensions")
return testResult
}
metrics, err := listFetcher.Fetch(ContainerInsightsNamespace, "", dims)
if err != nil {
log.Println("failed to fetch metric list", err)
return nil
}
if len(metrics) < 1 {
log.Println("cloudwatch metric list is empty")
return nil
}

// get list of metrics that has more dimensions for container insights
// this is to avoid adding more dimension provider for non-trivial dimensions e.g. PodName
listFetcher := metric.MetricListFetcher{}
if slices.Contains(metricsWithMoreDimensions, name) {
metrics, err := listFetcher.Fetch(containerInsightsNamespace, name, dims)
if err != nil {
log.Println("failed to fetch metric list", err)
return testResult
var results []dimToMetrics
for _, m := range metrics {
// filter by metric name filter(skip gpu validation)
if metricFilter != "" && strings.Contains(*m.MetricName, metricFilter) {
continue
}

if len(metrics) < 1 {
log.Println("metric list is empty")
return testResult
var dims []string
for _, d := range m.Dimensions {
dims = append(dims, *d.Name)
}

// just verify 1 of returned metrics for values
for _, dim := range metrics[0].Dimensions {
// skip since it's provided by dimension provider
if *dim.Name == "ClusterName" {
continue
sort.Sort(sort.StringSlice(dims)) //what's the point of sorting?
dimsKey := strings.Join(dims, dimDelimiter)
log.Printf("processing dims: %s", dimsKey)

var dtm dimToMetrics
for _, ele := range results {
if ele.dimStr == dimsKey {
dtm = ele
break
}
}
if dtm.dimStr == "" {
dtm = dimToMetrics{
dimStr: dimsKey,
metrics: make(map[string][][]types.Dimension),
}
results = append(results, dtm)
}
dtm.metrics[*m.MetricName] = append(dtm.metrics[*m.MetricName], m.Dimensions)
}
return results
}

dims = append(dims, types.Dimension{
Name: dim.Name,
Value: dim.Value,
})
// Check if all metrics from cluster matches hard coded map
func validateMetricsAvailability(dims string, expected []string, actual map[string][][]types.Dimension) status.TestResult {
testResult := status.TestResult{
Name: dims,
Status: status.FAILED,
}
log.Printf("expected metrics: %d, actual metrics: %d", len(expected), len(actual))
if compareMetrics(expected, actual) {
testResult.Status = status.SUCCESSFUL
} else {
log.Printf("validateMetricsAvailability failed for %s", dims)
}
return testResult
}

func compareMetrics(expected []string, actual map[string][][]types.Dimension) bool {
if len(expected) != len(actual) {
return false
}

for _, key := range expected {
if _, ok := actual[key]; !ok {
return false
}
}
return true
}

func validateMetricValue(name string, dims []types.Dimension) status.TestResult {
log.Printf("validateMetricValue with metric: %s", name)
testResult := status.TestResult{
Name: name,
Status: status.FAILED,
}
valueFetcher := metric.MetricValueFetcher{}
values, err := valueFetcher.Fetch(containerInsightsNamespace, name, dims, metric.AVERAGE, metric.HighResolutionStatPeriod)
values, err := valueFetcher.Fetch(containerInsightsNamespace, name, dims, metric.SAMPLE_COUNT, metric.MinuteStatPeriod)
if err != nil {
log.Println("failed to fetch metrics", err)
return testResult
Expand Down Expand Up @@ -133,6 +219,7 @@ func (e *EKSDaemonTestRunner) validateLogs(env *environment.MetaData) status.Tes
nil,
&now,
awsservice.AssertLogsNotEmpty(),
awsservice.AssertNoDuplicateLogs(),
awsservice.AssertPerLog(
awsservice.AssertLogSchema(func(message string) (string, error) {
var eksClusterType awsservice.EKSClusterType
Expand Down
Loading

0 comments on commit 2e403f6

Please sign in to comment.