Skip to content

Commit

Permalink
[receiver/k8scluster] add attributes to node and pod entities (open-t…
Browse files Browse the repository at this point in the history
…elemetry#36862)

#### Description

Adds below additional metadata attributes to the node and pod entity -

k8s.pod.phase - This is similar to the existing metric k8s.pod.phase.
The values can be Pending, Running, Succeeded, Failed, Unknown.
k8s.pod.status_reason - Similar to k8s.pod.status_reason metric. A brief
CamelCase message indicating details about why the pod is in this state.
Example values - Evicted, NodeLost, UnexpectedAdmissionError
k8s.node.condition_{type} - similar to existing metrics enabled by the
config node_conditions_to_report, e.g. k8s.node.condition_ready. Add
[k8s default kubelet
conditions](https://pkg.go.dev/k8s.io/api/core/v1#NodeConditionType)
only.

We'll be tracking changes to entitiy attributes for k8s in
[this](open-telemetry/semantic-conventions#1693)
SemConv issue.

#### Link to tracking issue
Fixes - open-telemetry#36859

#### Testing
Added unit tests and verified in cluster
  • Loading branch information
jinja2 authored Jan 6, 2025
1 parent b938631 commit d9ea85f
Show file tree
Hide file tree
Showing 6 changed files with 268 additions and 18 deletions.
31 changes: 31 additions & 0 deletions .chloggen/add_k8scluster_attr.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: enhancement

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: k8sclusterreceiver

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Add additional attributes to node and pod entities

# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
issues: [35879]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext: |
Adds the following attributes to node and pod metadata/entities:
- `k8s.pod.phase`: The phase of a Pod indicates where the Pod is in its lifecycle. E.g. 'Pending', 'Running'
- `k8s.pod.status_reason`: A brief message indicating details about why the pod is in this state. E.g. 'Evicted'
- `k8s.node.condition_*`: The condition of a node. e.g. `k8s.node.condition_ready`. The value can be `true`, `false`, `unknown`.
# If your change doesn't affect end users or the exported elements of any package,
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: [user]
23 changes: 21 additions & 2 deletions receiver/k8sclusterreceiver/internal/node/nodes.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,9 @@ import (
)

const (
// Keys for node metadata.
nodeCreationTime = "node.creation_timestamp"
// Keys for node metadata and entity attributes. These are NOT used by resource attributes.
nodeCreationTime = "node.creation_timestamp"
k8sNodeConditionPrefix = "k8s.node.condition"
)

// Transform transforms the node to remove the fields that we don't use to reduce RAM utilization.
Expand Down Expand Up @@ -151,6 +152,24 @@ func GetMetadata(node *corev1.Node) map[experimentalmetricmetadata.ResourceID]*m
meta[conventions.AttributeK8SNodeName] = node.Name
meta[nodeCreationTime] = node.GetCreationTimestamp().Format(time.RFC3339)

// Node can have many additional conditions (gke has 18 on v1.29). Bad thresholds/implementations
// of custom conditions can cause value to oscillate between true/false frequently. So, only sending the node
// pressure conditions that are set by kubelet to avoid noise.
// https://pkg.go.dev/k8s.io/api/core/v1#NodeConditionType
kubeletConditions := map[corev1.NodeConditionType]struct{}{
corev1.NodeReady: {},
corev1.NodeMemoryPressure: {},
corev1.NodeDiskPressure: {},
corev1.NodePIDPressure: {},
corev1.NodeNetworkUnavailable: {},
}

for _, c := range node.Status.Conditions {
if _, ok := kubeletConditions[c.Type]; ok {
meta[fmt.Sprintf("%s_%s", k8sNodeConditionPrefix, strcase.ToSnake(string(c.Type)))] = strings.ToLower(string(c.Status))
}
}

nodeID := experimentalmetricmetadata.ResourceID(node.UID)
return map[experimentalmetricmetadata.ResourceID]*metadata.KubernetesMetadata{
nodeID: {
Expand Down
110 changes: 110 additions & 0 deletions receiver/k8sclusterreceiver/internal/node/nodes_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,12 @@ import (
"go.opentelemetry.io/collector/pdata/pcommon"
"go.opentelemetry.io/collector/pdata/pmetric"
"go.opentelemetry.io/collector/receiver/receivertest"
conventions "go.opentelemetry.io/collector/semconv/v1.18.0"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/experimentalmetricmetadata"
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/golden"
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatatest/pmetrictest"
"github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver/internal/metadata"
Expand Down Expand Up @@ -263,3 +265,111 @@ func TestTransform(t *testing.T) {
}
assert.Equal(t, wantNode, Transform(originalNode))
}

func TestNodeMetadata(t *testing.T) {
creationTimestamp := time.Now()
node := &corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "test-node",
UID: "test-node-uid",
Labels: map[string]string{"env": "production"},
CreationTimestamp: metav1.Time{Time: creationTimestamp},
},
Status: corev1.NodeStatus{
Conditions: []corev1.NodeCondition{
{
Type: "FrequentUnregisterNetDevice",
Status: "False",
LastHeartbeatTime: metav1.Time{
Time: time.Now(),
},
LastTransitionTime: metav1.Time{
Time: time.Now(),
},
Message: "node is functioning properly",
Reason: "NoFrequentUnregisterNetDevice",
},
{
Type: "MemoryPressure",
Status: "False",
LastHeartbeatTime: metav1.Time{
Time: time.Now(),
},
LastTransitionTime: metav1.Time{
Time: time.Now(),
},
Reason: "KubeletHasSufficientMemory",
Message: "kubelet has sufficient memory available",
},
{
Type: "DiskPressure",
Status: "False",
LastHeartbeatTime: metav1.Time{
Time: time.Now(),
},
LastTransitionTime: metav1.Time{
Time: time.Now(),
},
Reason: "KubeletHasNoDiskPressure",
Message: "kubelet has no disk pressure",
},
{
Type: "PIDPressure",
Status: "False",
LastHeartbeatTime: metav1.Time{
Time: time.Now(),
},
LastTransitionTime: metav1.Time{
Time: time.Now(),
},
Reason: "KubeletHasSufficientPID",
Message: "kubelet has sufficient PID available",
},
{
Type: "Ready",
Status: "True",
LastHeartbeatTime: metav1.Time{
Time: time.Now(),
},
LastTransitionTime: metav1.Time{
Time: time.Now(),
},
Reason: "KubeletReady",
Message: "kubelet is posting ready status",
},
},
NodeInfo: corev1.NodeSystemInfo{
MachineID: "70ebe86154de42bda73a4ffe181afa3d",
SystemUUID: "70ebe86154de42bda73a4ffe181afa3d",
BootID: "541c31d4-d1e2-4660-a3b2-484abbb1cbce",
KernelVersion: "6.10.4-linuxkit",
OSImage: "Debian GNU/Linux 12 (bookworm)",
ContainerRuntimeVersion: "containerd://1.7.15",
KubeletVersion: "v1.30.0",
OperatingSystem: "linux",
Architecture: "amd64",
},
},
}

expectedMeta := map[experimentalmetricmetadata.ResourceID]*metadata.KubernetesMetadata{
experimentalmetricmetadata.ResourceID("test-node-uid"): {
EntityType: "k8s.node",
ResourceIDKey: "k8s.node.uid",
ResourceID: experimentalmetricmetadata.ResourceID("test-node-uid"),
Metadata: map[string]string{
"env": "production",
conventions.AttributeK8SNodeName: "test-node",
"k8s.node.condition_ready": "true",
"k8s.node.condition_memory_pressure": "false",
"k8s.node.condition_disk_pressure": "false",
"k8s.node.condition_pid_pressure": "false",
"node.creation_timestamp": creationTimestamp.Format(time.RFC3339),
},
},
}

actualMeta := GetMetadata(node)
require.NotNil(t, actualMeta)
require.Equal(t, expectedMeta, actualMeta)
}
14 changes: 13 additions & 1 deletion receiver/k8sclusterreceiver/internal/pod/pods.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,10 @@ import (
)

const (
// Keys for pod metadata.
// Keys for pod metadata and entity attributes. These are NOT used by resource attributes.
podCreationTime = "pod.creation_timestamp"
podPhase = "k8s.pod.phase"
podStatusReason = "k8s.pod.status_reason"
)

// Transform transforms the pod to remove the fields that we don't use to reduce RAM utilization.
Expand All @@ -43,6 +45,7 @@ func Transform(pod *corev1.Pod) *corev1.Pod {
Status: corev1.PodStatus{
Phase: pod.Status.Phase,
QOSClass: pod.Status.QOSClass,
Reason: pod.Status.Reason,
},
}
for _, cs := range pod.Status.ContainerStatuses {
Expand Down Expand Up @@ -126,6 +129,15 @@ func GetMetadata(pod *corev1.Pod, mc *metadata.Store, logger *zap.Logger) map[ex
meta := maps.MergeStringMaps(map[string]string{}, pod.Labels)

meta[podCreationTime] = pod.CreationTimestamp.Format(time.RFC3339)
phase := pod.Status.Phase
if phase == "" {
phase = corev1.PodUnknown
}
meta[podPhase] = string(phase)
reason := pod.Status.Reason
if reason != "" {
meta[podStatusReason] = reason
}

for _, or := range pod.OwnerReferences {
kind := strings.ToLower(or.Kind)
Expand Down
76 changes: 73 additions & 3 deletions receiver/k8sclusterreceiver/internal/pod/pods_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,8 +247,9 @@ func expectedKubernetesMetadata(to testCaseOptions) map[experimentalmetricmetada
ResourceIDKey: "k8s.pod.uid",
ResourceID: experimentalmetricmetadata.ResourceID(podUIDLabel),
Metadata: map[string]string{
kindNameLabel: kindObjName,
kindUIDLabel: kindObjUID,
kindNameLabel: kindObjName,
kindUIDLabel: kindObjUID,
"k8s.pod.phase": "Unknown", // Default value when phase is not set.
},
},
}
Expand Down Expand Up @@ -415,6 +416,7 @@ func TestTransform(t *testing.T) {
},
Status: corev1.PodStatus{
Phase: corev1.PodRunning,
Reason: "Evicted",
HostIP: "192.168.1.100",
PodIP: "10.244.0.5",
StartTime: &v1.Time{Time: v1.Now().Add(-5 * time.Minute)},
Expand Down Expand Up @@ -463,7 +465,8 @@ func TestTransform(t *testing.T) {
},
},
Status: corev1.PodStatus{
Phase: corev1.PodRunning,
Phase: corev1.PodRunning,
Reason: "Evicted",
ContainerStatuses: []corev1.ContainerStatus{
{
Name: "my-container",
Expand All @@ -478,3 +481,70 @@ func TestTransform(t *testing.T) {
}
assert.Equal(t, wantPod, Transform(originalPod))
}

func TestPodMetadata(t *testing.T) {
tests := []struct {
name string
statusPhase corev1.PodPhase
statusReason string
expectedMetadata map[string]string
}{
{
name: "Pod with status reason",
statusPhase: corev1.PodFailed,
statusReason: "Evicted",
expectedMetadata: map[string]string{
"k8s.pod.phase": "Failed",
"k8s.pod.status_reason": "Evicted",
"k8s.workload.kind": "Deployment",
"k8s.workload.name": "test-deployment-0",
"k8s.replicaset.name": "test-replicaset-0",
"k8s.replicaset.uid": "test-replicaset-0-uid",
"k8s.deployment.name": "test-deployment-0",
"k8s.deployment.uid": "test-deployment-0-uid",
},
},
{
name: "Pod without status reason",
statusPhase: corev1.PodRunning,
statusReason: "",
expectedMetadata: map[string]string{
"k8s.pod.phase": "Running",
"k8s.workload.kind": "Deployment",
"k8s.workload.name": "test-deployment-0",
"k8s.replicaset.name": "test-replicaset-0",
"k8s.replicaset.uid": "test-replicaset-0-uid",
"k8s.deployment.name": "test-deployment-0",
"k8s.deployment.uid": "test-deployment-0-uid",
},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
pod := podWithOwnerReference("ReplicaSet")
pod.Status.Phase = tt.statusPhase
pod.Status.Reason = tt.statusReason

metadataStore := mockMetadataStore(testCaseOptions{
kind: "ReplicaSet",
withParentOR: true,
})
logger := zap.NewNop()
meta := GetMetadata(pod, metadataStore, logger)

require.NotNil(t, meta)
require.Contains(t, meta, experimentalmetricmetadata.ResourceID("test-pod-0-uid"))
podMeta := meta["test-pod-0-uid"].Metadata

allExpectedMetadata := make(map[string]string)
for key, value := range commonPodMetadata {
allExpectedMetadata[key] = value
}
for key, value := range tt.expectedMetadata {
allExpectedMetadata[key] = value
}
assert.Equal(t, allExpectedMetadata, podMeta)
})
}
}
Loading

0 comments on commit d9ea85f

Please sign in to comment.