Skip to content

Commit

Permalink
making sure to seperate integ test metrics with e2e metrics (prev com…
Browse files Browse the repository at this point in the history
…mit works)
  • Loading branch information
Paramadon committed Jun 13, 2024
1 parent 63de98d commit 82c2b9e
Showing 1 changed file with 49 additions and 1 deletion.
50 changes: 49 additions & 1 deletion test/gpu/nvidia_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
package emf

import (
"flag"
"time"

"github.com/aws/amazon-cloudwatch-agent-test/environment"
Expand All @@ -14,6 +15,8 @@ import (
"github.com/aws/amazon-cloudwatch-agent-test/test/test_runner"
)

var useE2EMetrics = flag.Bool("useE2EMetrics", false, "Use E2E metrics mapping which uses latest build CWA")

const (
gpuMetricIndicator = "_gpu_"

Expand Down Expand Up @@ -46,7 +49,48 @@ const (
clusterCountRequest = "cluster_gpu_request"
)

var expectedDimsToMetrics = map[string][]string{
var expectedDimsToMetricsIntegTest = map[string][]string{
"ClusterName": {
containerMemTotal, containerMemUsed, containerPower, containerTemp, containerUtil, containerMemUtil,
podMemTotal, podMemUsed, podPower, podTemp, podUtil, podMemUtil,
nodeMemTotal, nodeMemUsed, nodePower, nodeTemp, nodeUtil, nodeMemUtil,
//nodeCountTotal, nodeCountRequest, nodeCountLimit,
//clusterCountTotal, clusterCountRequest,
},
"ClusterName-Namespace": {
podMemTotal, podMemUsed, podPower, podTemp, podUtil, podMemUtil,
},
//"ClusterName-Namespace-Service": {
// podMemTotal, podMemUsed, podPower, podTemp, podUtil, podMemUtil,
//},
"ClusterName-Namespace-PodName": {
podMemTotal, podMemUsed, podPower, podTemp, podUtil, podMemUtil,
},
"ClusterName-ContainerName-Namespace-PodName": {
containerMemTotal, containerMemUsed, containerPower, containerTemp, containerUtil, containerMemUtil,
},
"ClusterName-ContainerName-FullPodName-Namespace-PodName": {
containerMemTotal, containerMemUsed, containerPower, containerTemp, containerUtil, containerMemUtil,
},
"ClusterName-ContainerName-FullPodName-GpuDevice-Namespace-PodName": {
containerMemTotal, containerMemUsed, containerPower, containerTemp, containerUtil, containerMemUtil,
},
"ClusterName-FullPodName-Namespace-PodName": {
podMemTotal, podMemUsed, podPower, podTemp, podUtil, podMemUtil,
},
"ClusterName-FullPodName-GpuDevice-Namespace-PodName": {
podMemTotal, podMemUsed, podPower, podTemp, podUtil, podMemUtil,
},
"ClusterName-InstanceId-NodeName": {
nodeMemTotal, nodeMemUsed, nodePower, nodeTemp, nodeUtil, nodeMemUtil,
//nodeCountTotal, nodeCountRequest, nodeCountLimit,
},
"ClusterName-GpuDevice-InstanceId-InstanceType-NodeName": {
nodeMemTotal, nodeMemUsed, nodePower, nodeTemp, nodeUtil, nodeMemUtil,
},
}

var expectedDimsToMetricsE2E = map[string][]string{
"ClusterName": {
containerMemTotal, containerMemUsed, containerPower, containerTemp, containerUtil, containerMemUtil,
podMemTotal, podMemUsed, podPower, podTemp, podUtil, podMemUtil,
Expand Down Expand Up @@ -95,6 +139,10 @@ var _ test_runner.ITestRunner = (*NvidiaTestRunner)(nil)

func (t *NvidiaTestRunner) Validate() status.TestGroupResult {
var testResults []status.TestResult
expectedDimsToMetrics := expectedDimsToMetricsIntegTest
if *useE2EMetrics {
expectedDimsToMetrics = expectedDimsToMetricsE2E
}
testResults = append(testResults, metric.ValidateMetrics(t.env, gpuMetricIndicator, expectedDimsToMetrics)...)
testResults = append(testResults, metric.ValidateLogs(t.env))
return status.TestGroupResult{
Expand Down

0 comments on commit 82c2b9e

Please sign in to comment.