Skip to content

Commit

Permalink
Add support for Elastic Fabric Adapter (EFA) metrics. (aws#1117)
Browse files Browse the repository at this point in the history
  • Loading branch information
straussb authored Apr 3, 2024
1 parent 072eac4 commit 5f676d1
Show file tree
Hide file tree
Showing 3 changed files with 147 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -538,6 +538,76 @@ exporters:
- node_neurondevice_hw_ecc_events_total_mem_ecc_uncorrected
- node_neurondevice_hw_ecc_events_total_sram_ecc_corrected
- node_neurondevice_hw_ecc_events_total_sram_ecc_uncorrected
- dimensions:
- - ClusterName
- - ClusterName
- ContainerName
- Namespace
- PodName
- - ClusterName
- ContainerName
- FullPodName
- Namespace
- PodName
- - ClusterName
- ContainerName
- EfaDevice
- FullPodName
- Namespace
- PodName
label_matchers: []
metric_name_selectors:
- container_efa_rx_bytes
- container_efa_tx_bytes
- container_efa_rx_dropped
- container_efa_rdma_read_bytes
- container_efa_rdma_write_bytes
- container_efa_rdma_write_recv_bytes
- dimensions:
- - ClusterName
- - ClusterName
- Namespace
- - ClusterName
- Namespace
- Service
- - ClusterName
- Namespace
- PodName
- - ClusterName
- FullPodName
- Namespace
- PodName
- - ClusterName
- EfaDevice
- FullPodName
- Namespace
- PodName
label_matchers: []
metric_name_selectors:
- pod_efa_rx_bytes
- pod_efa_tx_bytes
- pod_efa_rx_dropped
- pod_efa_rdma_read_bytes
- pod_efa_rdma_write_bytes
- pod_efa_rdma_write_recv_bytes
- dimensions:
- - ClusterName
- - ClusterName
- InstanceId
- NodeName
- - ClusterName
- EfaDevice
- InstanceId
- InstanceType
- NodeName
label_matchers: []
metric_name_selectors:
- node_efa_rx_bytes
- node_efa_tx_bytes
- node_efa_rx_dropped
- node_efa_rdma_read_bytes
- node_efa_rdma_write_bytes
- node_efa_rdma_write_recv_bytes
metric_descriptors:
- metric_name: apiserver_admission_controller_admission_duration_seconds
overwrite: true
Expand Down
59 changes: 59 additions & 0 deletions translator/translate/otel/exporter/awsemf/kubernetes.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ func setKubernetesMetricDeclaration(conf *confmap.Conf, cfg *awsemfexporter.Conf
// Setup Aws Neuron metrics
kubernetesMetricDeclarations = append(kubernetesMetricDeclarations, getAwsNeuronMetricDeclarations(conf)...)

kubernetesMetricDeclarations = append(kubernetesMetricDeclarations, getEFAMetricDeclarations(conf)...)

cfg.MetricDeclarations = kubernetesMetricDeclarations
cfg.MetricDescriptors = getControlPlaneMetricDescriptors(conf)

Expand Down Expand Up @@ -615,3 +617,60 @@ func getAwsNeuronMetricDeclarations(conf *confmap.Conf) []*awsemfexporter.Metric
}
return metricDeclarations
}

func getEFAMetricDeclarations(conf *confmap.Conf) []*awsemfexporter.MetricDeclaration {
var metricDeclarations []*awsemfexporter.MetricDeclaration
if awscontainerinsight.EnhancedContainerInsightsEnabled(conf) && awscontainerinsight.AcceleratedComputeMetricsEnabled(conf) {
metricDeclarations = []*awsemfexporter.MetricDeclaration{
{
Dimensions: [][]string{
{"ClusterName"},
{"ClusterName", "Namespace", "PodName", "ContainerName"},
{"ClusterName", "Namespace", "PodName", "FullPodName", "ContainerName"},
{"ClusterName", "Namespace", "PodName", "FullPodName", "ContainerName", "EfaDevice"}},
MetricNameSelectors: []string{
"container_efa_rx_bytes",
"container_efa_tx_bytes",
"container_efa_rx_dropped",
"container_efa_rdma_read_bytes",
"container_efa_rdma_write_bytes",
"container_efa_rdma_write_recv_bytes",
},
},
{
Dimensions: [][]string{
{"ClusterName"},
{"ClusterName", "Namespace"},
{"ClusterName", "Namespace", "Service"},
{"ClusterName", "Namespace", "PodName"},
{"ClusterName", "Namespace", "PodName", "FullPodName"},
{"ClusterName", "Namespace", "PodName", "FullPodName", "EfaDevice"},
},
MetricNameSelectors: []string{
"pod_efa_rx_bytes",
"pod_efa_tx_bytes",
"pod_efa_rx_dropped",
"pod_efa_rdma_read_bytes",
"pod_efa_rdma_write_bytes",
"pod_efa_rdma_write_recv_bytes",
},
},
{
Dimensions: [][]string{
{"ClusterName"},
{"ClusterName", "NodeName", "InstanceId"},
{"ClusterName", "NodeName", "InstanceId", "InstanceType", "EfaDevice"},
},
MetricNameSelectors: []string{
"node_efa_rx_bytes",
"node_efa_tx_bytes",
"node_efa_rx_dropped",
"node_efa_rdma_read_bytes",
"node_efa_rdma_write_bytes",
"node_efa_rdma_write_recv_bytes",
},
},
}
}
return metricDeclarations
}
18 changes: 18 additions & 0 deletions translator/translate/otel/exporter/awsemf/translator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -513,6 +513,24 @@ func TestTranslator(t *testing.T) {
"node_neurondevice_hw_ecc_events_total_sram_ecc_uncorrected",
},
},
{
Dimensions: [][]string{{"ClusterName"}, {"ClusterName", "Namespace", "PodName", "ContainerName"}, {"ClusterName", "Namespace", "PodName", "FullPodName", "ContainerName"}, {"ClusterName", "Namespace", "PodName", "FullPodName", "ContainerName", "EfaDevice"}},
MetricNameSelectors: []string{
"container_efa_rx_bytes", "container_efa_tx_bytes", "container_efa_rx_dropped", "container_efa_rdma_read_bytes", "container_efa_rdma_write_bytes", "container_efa_rdma_write_recv_bytes",
},
},
{
Dimensions: [][]string{{"ClusterName"}, {"ClusterName", "Namespace"}, {"ClusterName", "Namespace", "Service"}, {"ClusterName", "Namespace", "PodName"}, {"ClusterName", "Namespace", "PodName", "FullPodName"}, {"ClusterName", "Namespace", "PodName", "FullPodName", "EfaDevice"}},
MetricNameSelectors: []string{
"pod_efa_rx_bytes", "pod_efa_tx_bytes", "pod_efa_rx_dropped", "pod_efa_rdma_read_bytes", "pod_efa_rdma_write_bytes", "pod_efa_rdma_write_recv_bytes",
},
},
{
Dimensions: [][]string{{"ClusterName"}, {"ClusterName", "NodeName", "InstanceId"}, {"ClusterName", "NodeName", "InstanceId", "InstanceType", "EfaDevice"}},
MetricNameSelectors: []string{
"node_efa_rx_bytes", "node_efa_tx_bytes", "node_efa_rx_dropped", "node_efa_rdma_read_bytes", "node_efa_rdma_write_bytes", "node_efa_rdma_write_recv_bytes",
},
},
},
"metric_descriptors": []awsemfexporter.MetricDescriptor{
{
Expand Down

0 comments on commit 5f676d1

Please sign in to comment.