From ca00a4d960861623f3d47ada16310d961af1c2d2 Mon Sep 17 00:00:00 2001 From: Chad Patel Date: Tue, 10 Oct 2023 14:40:48 -0500 Subject: [PATCH] =?UTF-8?q?move=20container=5Fstatus=5F=20metrics=20to=20p?= =?UTF-8?q?od=5Fcontainer=5F=20so=20we=20can=20pick=20up=20sh=E2=80=A6=20(?= =?UTF-8?q?#109)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * move container_status_ metrics to pod_container_ so we can pick up short lived container states * update README * update README * minor refactor so units work --- internal/aws/containerinsight/const.go | 107 ++++++++--------- .../awscontainerinsightreceiver/README.md | 110 +++++++++--------- .../internal/stores/podstore.go | 74 ++++++++---- .../internal/stores/podstore_test.go | 110 ++++++++++++------ 4 files changed, 233 insertions(+), 168 deletions(-) diff --git a/internal/aws/containerinsight/const.go b/internal/aws/containerinsight/const.go index 62228df8ecd4..d6ff6ecdbc9c 100644 --- a/internal/aws/containerinsight/const.go +++ b/internal/aws/containerinsight/const.go @@ -76,38 +76,39 @@ const ( FSInodesfree = "filesystem_inodes_free" FSUtilization = "filesystem_utilization" - StatusConditionReady = "status_condition_ready" - StatusConditionDiskPressure = "status_condition_disk_pressure" - StatusConditionMemoryPressure = "status_condition_memory_pressure" - StatusConditionPIDPressure = "status_condition_pid_pressure" - StatusConditionNetworkUnavailable = "status_condition_network_unavailable" - StatusConditionUnknown = "status_condition_unknown" - StatusCapacityPods = "status_capacity_pods" - StatusAllocatablePods = "status_allocatable_pods" - StatusNumberAvailable = "status_number_available" - StatusNumberUnavailable = "status_number_unavailable" - StatusDesiredNumberScheduled = "status_desired_number_scheduled" - StatusCurrentNumberScheduled = "status_current_number_scheduled" - StatusReplicasAvailable = "status_replicas_available" - StatusReplicasUnavailable = "status_replicas_unavailable" - SpecReplicas = "spec_replicas" - StatusRunning = "status_running" - StatusTerminated = "status_terminated" - StatusWaiting = "status_waiting" - StatusWaitingReasonCrashLoopBackOff = "status_waiting_reason_crash_loop_back_off" - StatusWaitingReasonImagePullError = "status_waiting_reason_image_pull_error" - StatusWaitingReasonStartError = "status_waiting_reason_start_error" - StatusWaitingReasonCreateContainerError = "status_waiting_reason_create_container_error" - StatusWaitingReasonCreateContainerConfigError = "status_waiting_reason_create_container_config_error" - StatusTerminatedReasonOOMKilled = "status_terminated_reason_oom_killed" - StatusPending = "status_pending" - StatusSucceeded = "status_succeeded" - StatusFailed = "status_failed" - StatusUnknown = "status_unknown" - StatusReady = "status_ready" - StatusScheduled = "status_scheduled" - ReplicasDesired = "replicas_desired" - ReplicasReady = "replicas_ready" + StatusConditionReady = "status_condition_ready" + StatusConditionDiskPressure = "status_condition_disk_pressure" + StatusConditionMemoryPressure = "status_condition_memory_pressure" + StatusConditionPIDPressure = "status_condition_pid_pressure" + StatusConditionNetworkUnavailable = "status_condition_network_unavailable" + StatusConditionUnknown = "status_condition_unknown" + StatusCapacityPods = "status_capacity_pods" + StatusAllocatablePods = "status_allocatable_pods" + StatusNumberAvailable = "status_number_available" + StatusNumberUnavailable = "status_number_unavailable" + StatusDesiredNumberScheduled = "status_desired_number_scheduled" + StatusCurrentNumberScheduled = "status_current_number_scheduled" + StatusReplicasAvailable = "status_replicas_available" + StatusReplicasUnavailable = "status_replicas_unavailable" + SpecReplicas = "spec_replicas" + StatusContainerRunning = "container_status_running" + StatusContainerTerminated = "container_status_terminated" + StatusContainerWaiting = "container_status_waiting" + StatusContainerWaitingReasonCrashLoopBackOff = "container_status_waiting_reason_crash_loop_back_off" + StatusContainerWaitingReasonImagePullError = "container_status_waiting_reason_image_pull_error" + StatusContainerWaitingReasonStartError = "container_status_waiting_reason_start_error" + StatusContainerWaitingReasonCreateContainerError = "container_status_waiting_reason_create_container_error" + StatusContainerWaitingReasonCreateContainerConfigError = "container_status_waiting_reason_create_container_config_error" + StatusContainerTerminatedReasonOOMKilled = "container_status_terminated_reason_oom_killed" + StatusRunning = "status_running" + StatusPending = "status_pending" + StatusSucceeded = "status_succeeded" + StatusFailed = "status_failed" + StatusUnknown = "status_unknown" + StatusReady = "status_ready" + StatusScheduled = "status_scheduled" + ReplicasDesired = "replicas_desired" + ReplicasReady = "replicas_ready" RunningPodCount = "number_of_running_pods" RunningContainerCount = "number_of_running_containers" @@ -163,13 +164,13 @@ const ( ) var WaitingReasonLookup = map[string]string{ - "CrashLoopBackOff": StatusWaitingReasonCrashLoopBackOff, - "ErrImagePull": StatusWaitingReasonImagePullError, - "ImagePullBackOff": StatusWaitingReasonImagePullError, - "InvalidImageName": StatusWaitingReasonImagePullError, - "CreateContainerError": StatusWaitingReasonCreateContainerError, - "CreateContainerConfigError": StatusWaitingReasonCreateContainerConfigError, - "StartError": StatusWaitingReasonStartError, + "CrashLoopBackOff": StatusContainerWaitingReasonCrashLoopBackOff, + "ErrImagePull": StatusContainerWaitingReasonImagePullError, + "ImagePullBackOff": StatusContainerWaitingReasonImagePullError, + "InvalidImageName": StatusContainerWaitingReasonImagePullError, + "CreateContainerError": StatusContainerWaitingReasonCreateContainerError, + "CreateContainerConfigError": StatusContainerWaitingReasonCreateContainerConfigError, + "StartError": StatusContainerWaitingReasonStartError, } var metricToUnitMap map[string]string @@ -261,20 +262,20 @@ func init() { ReplicasReady: UnitCount, // kube-state-metrics equivalents - StatusRunning: UnitCount, - StatusTerminated: UnitCount, - StatusWaiting: UnitCount, - StatusWaitingReasonCrashLoopBackOff: UnitCount, - StatusWaitingReasonImagePullError: UnitCount, - StatusWaitingReasonStartError: UnitCount, - StatusWaitingReasonCreateContainerConfigError: UnitCount, - StatusWaitingReasonCreateContainerError: UnitCount, - StatusFailed: UnitCount, - StatusPending: UnitCount, - StatusSucceeded: UnitCount, - StatusUnknown: UnitCount, - StatusReady: UnitCount, - StatusScheduled: UnitCount, + StatusContainerRunning: UnitCount, + StatusContainerTerminated: UnitCount, + StatusContainerWaiting: UnitCount, + StatusContainerWaitingReasonCrashLoopBackOff: UnitCount, + StatusContainerWaitingReasonImagePullError: UnitCount, + StatusContainerWaitingReasonStartError: UnitCount, + StatusContainerWaitingReasonCreateContainerConfigError: UnitCount, + StatusContainerWaitingReasonCreateContainerError: UnitCount, + StatusFailed: UnitCount, + StatusPending: UnitCount, + StatusSucceeded: UnitCount, + StatusUnknown: UnitCount, + StatusReady: UnitCount, + StatusScheduled: UnitCount, // cluster metrics NodeCount: UnitCount, diff --git a/receiver/awscontainerinsightreceiver/README.md b/receiver/awscontainerinsightreceiver/README.md index 81502bb1954a..c270856d50ae 100644 --- a/receiver/awscontainerinsightreceiver/README.md +++ b/receiver/awscontainerinsightreceiver/README.md @@ -687,52 +687,61 @@ kubectl apply -f config.yaml

### Pod -| Metric | Unit | -|---------------------------------------|--------------| -| pod_cpu_limit | Millicore | -| pod_cpu_request | Millicore | -| pod_cpu_reserved_capacity | Percent | -| pod_cpu_usage_system | Millicore | -| pod_cpu_usage_total | Millicore | -| pod_cpu_usage_user | Millicore | -| pod_cpu_utilization | Percent | -| pod_cpu_utilization_over_pod_limit | Percent | -| pod_memory_cache | Bytes | -| pod_memory_failcnt | Count | -| pod_memory_hierarchical_pgfault | Count/Second | -| pod_memory_hierarchical_pgmajfault | Count/Second | -| pod_memory_limit | Bytes | -| pod_memory_mapped_file | Bytes | -| pod_memory_max_usage | Bytes | -| pod_memory_pgfault | Count/Second | -| pod_memory_pgmajfault | Count/Second | -| pod_memory_request | Bytes | -| pod_memory_reserved_capacity | Percent | -| pod_memory_rss | Bytes | -| pod_memory_swap | Bytes | -| pod_memory_usage | Bytes | -| pod_memory_utilization | Percent | -| pod_memory_utilization_over_pod_limit | Percent | -| pod_memory_working_set | Bytes | -| pod_network_rx_bytes | Bytes/Second | -| pod_network_rx_dropped | Count/Second | -| pod_network_rx_errors | Count/Second | -| pod_network_rx_packets | Count/Second | -| pod_network_total_bytes | Bytes/Second | -| pod_network_tx_bytes | Bytes/Second | -| pod_network_tx_dropped | Count/Second | -| pod_network_tx_errors | Count/Second | -| pod_network_tx_packets | Count/Second | -| pod_number_of_container_restarts | Count | -| pod_number_of_containers | Count | -| pod_number_of_running_containers | Count | -| pod_status_ready | Count | -| pod_status_scheduled | Count | -| pod_status_unknown | Count | -| pod_status_failed | Count | -| pod_status_pending | Count | -| pod_status_running | Count | -| pod_status_succeeded | Count | +| Metric | Unit | +|-------------------------------------------------------------------|--------------| +| pod_cpu_limit | Millicore | +| pod_cpu_request | Millicore | +| pod_cpu_reserved_capacity | Percent | +| pod_cpu_usage_system | Millicore | +| pod_cpu_usage_total | Millicore | +| pod_cpu_usage_user | Millicore | +| pod_cpu_utilization | Percent | +| pod_cpu_utilization_over_pod_limit | Percent | +| pod_memory_cache | Bytes | +| pod_memory_failcnt | Count | +| pod_memory_hierarchical_pgfault | Count/Second | +| pod_memory_hierarchical_pgmajfault | Count/Second | +| pod_memory_limit | Bytes | +| pod_memory_mapped_file | Bytes | +| pod_memory_max_usage | Bytes | +| pod_memory_pgfault | Count/Second | +| pod_memory_pgmajfault | Count/Second | +| pod_memory_request | Bytes | +| pod_memory_reserved_capacity | Percent | +| pod_memory_rss | Bytes | +| pod_memory_swap | Bytes | +| pod_memory_usage | Bytes | +| pod_memory_utilization | Percent | +| pod_memory_utilization_over_pod_limit | Percent | +| pod_memory_working_set | Bytes | +| pod_network_rx_bytes | Bytes/Second | +| pod_network_rx_dropped | Count/Second | +| pod_network_rx_errors | Count/Second | +| pod_network_rx_packets | Count/Second | +| pod_network_total_bytes | Bytes/Second | +| pod_network_tx_bytes | Bytes/Second | +| pod_network_tx_dropped | Count/Second | +| pod_network_tx_errors | Count/Second | +| pod_network_tx_packets | Count/Second | +| pod_number_of_container_restarts | Count | +| pod_number_of_containers | Count | +| pod_number_of_running_containers | Count | +| pod_status_ready | Count | +| pod_status_scheduled | Count | +| pod_status_unknown | Count | +| pod_status_failed | Count | +| pod_status_pending | Count | +| pod_status_running | Count | +| pod_status_succeeded | Count | +| pod_container_status_running | Count | +| pod_container_status_terminated | Count | +| pod_container_status_waiting | Count | +| pod_container_status_waiting_reason_crash_loop_back_off | Count | +| pod_container_status_waiting_reason_image_pull_error | Count | +| pod_container_status_waiting_reason_start_error | Count | +| pod_container_status_waiting_reason_create_container_error | Count | +| pod_container_status_waiting_reason_create_container_config_error | Count | +| pod_container_status_terminated_reason_oom_killed | Count | | Resource Attribute | |----------------------| @@ -816,15 +825,6 @@ kubectl apply -f config.yaml | container_memory_utilization_over_container_limit | Percent | | container_memory_working_set | Bytes | | number_of_container_restarts | Count | -| container_status_running | Count | -| container_status_terminated | Count | -| container_status_waiting | Count | -| container_status_waiting_reason_crash_loop_back_off | Count | -| container_status_waiting_reason_image_pull_error | Count | -| container_status_waiting_reason_start_error | Count | -| container_status_waiting_reason_create_container_error | Count | -| container_status_waiting_reason_create_container_config_error | Count | -| container_status_terminated_reason_oom_killed | Count |

diff --git a/receiver/awscontainerinsightreceiver/internal/stores/podstore.go b/receiver/awscontainerinsightreceiver/internal/stores/podstore.go index 3babc6a37b45..a14817a8c2aa 100644 --- a/receiver/awscontainerinsightreceiver/internal/stores/podstore.go +++ b/receiver/awscontainerinsightreceiver/internal/stores/podstore.go @@ -497,12 +497,14 @@ func (p *PodStore) addStatus(metric CIMetric, pod *corev1.Pod) { if p.includeEnhancedMetrics { p.addPodStatusMetrics(metric, pod) p.addPodConditionMetrics(metric, pod) + p.addPodContainerStatusMetrics(metric, pod) } var curContainerRestarts int for _, containerStatus := range pod.Status.ContainerStatuses { curContainerRestarts += int(containerStatus.RestartCount) } + podKey := createPodKeyFromMetric(metric) if podKey != "" { content, ok := p.getPrevMeasurement(ci.TypePod, podKey) @@ -520,28 +522,16 @@ func (p *PodStore) addStatus(metric CIMetric, pod *corev1.Pod) { if containerName := metric.GetTag(ci.ContainerNamekey); containerName != "" { for _, containerStatus := range pod.Status.ContainerStatuses { if containerStatus.Name == containerName { - possibleStatuses := map[string]int{ - ci.StatusRunning: 0, - ci.StatusWaiting: 0, - ci.StatusTerminated: 0, - } switch { case containerStatus.State.Running != nil: metric.AddTag(ci.ContainerStatus, "Running") - possibleStatuses[ci.StatusRunning] = 1 case containerStatus.State.Waiting != nil: metric.AddTag(ci.ContainerStatus, "Waiting") - possibleStatuses[ci.StatusWaiting] = 1 - reason := containerStatus.State.Waiting.Reason - if reason != "" { - metric.AddTag(ci.ContainerStatusReason, reason) - if val, ok := ci.WaitingReasonLookup[reason]; ok { - possibleStatuses[val] = 1 - } + if containerStatus.State.Waiting.Reason != "" { + metric.AddTag(ci.ContainerStatusReason, containerStatus.State.Waiting.Reason) } case containerStatus.State.Terminated != nil: metric.AddTag(ci.ContainerStatus, "Terminated") - possibleStatuses[ci.StatusTerminated] = 1 if containerStatus.State.Terminated.Reason != "" { metric.AddTag(ci.ContainerStatusReason, containerStatus.State.Terminated.Reason) } @@ -549,9 +539,6 @@ func (p *PodStore) addStatus(metric CIMetric, pod *corev1.Pod) { if containerStatus.LastTerminationState.Terminated != nil && containerStatus.LastTerminationState.Terminated.Reason != "" { metric.AddTag(ci.ContainerLastTerminationReason, containerStatus.LastTerminationState.Terminated.Reason) - if strings.Contains(containerStatus.LastTerminationState.Terminated.Reason, "OOMKilled") { - possibleStatuses[ci.StatusTerminatedReasonOOMKilled] = 1 - } } containerKey := createContainerKeyFromMetric(metric) if containerKey != "" { @@ -566,13 +553,6 @@ func (p *PodStore) addStatus(metric CIMetric, pod *corev1.Pod) { } p.setPrevMeasurement(ci.TypeContainer, containerKey, prevContainerMeasurement{restarts: int(containerStatus.RestartCount)}) } - - // add container containerStatus metrics - if p.includeEnhancedMetrics { - for name, val := range possibleStatuses { - metric.AddField(ci.MetricName(ci.TypeContainer, name), val) - } - } } } } @@ -613,6 +593,52 @@ func (p *PodStore) addPodConditionMetrics(metric CIMetric, pod *corev1.Pod) { } } +func (p *PodStore) addPodContainerStatusMetrics(metric CIMetric, pod *corev1.Pod) { + possibleStatuses := map[string]int{ + ci.StatusContainerRunning: 0, + ci.StatusContainerWaiting: 0, + ci.StatusContainerTerminated: 0, + } + for _, containerStatus := range pod.Status.ContainerStatuses { + switch { + case containerStatus.State.Running != nil: + possibleStatuses[ci.StatusContainerRunning]++ + case containerStatus.State.Waiting != nil: + possibleStatuses[ci.StatusContainerWaiting]++ + reason := containerStatus.State.Waiting.Reason + if reason != "" { + if val, ok := ci.WaitingReasonLookup[reason]; ok { + if _, foundStatus := possibleStatuses[val]; foundStatus { + possibleStatuses[val]++ + } else { + possibleStatuses[val] = 1 + } + } + } + case containerStatus.State.Terminated != nil: + possibleStatuses[ci.StatusContainerTerminated]++ + if containerStatus.State.Terminated.Reason != "" { + metric.AddTag(ci.ContainerStatusReason, containerStatus.State.Terminated.Reason) + } + } + + if containerStatus.LastTerminationState.Terminated != nil && containerStatus.LastTerminationState.Terminated.Reason != "" { + if strings.Contains(containerStatus.LastTerminationState.Terminated.Reason, "OOMKilled") { + if _, foundStatus := possibleStatuses[ci.StatusContainerTerminatedReasonOOMKilled]; foundStatus { + possibleStatuses[ci.StatusContainerTerminatedReasonOOMKilled]++ + } else { + possibleStatuses[ci.StatusContainerTerminatedReasonOOMKilled] = 1 + } + } + } + } + + for name, val := range possibleStatuses { + // desired prefix: pod_container_ + metric.AddField(ci.MetricName(ci.TypePod, name), val) + } +} + // It could be used to get limit/request(depend on the passed-in fn) per pod // return the sum of ResourceSetting and a bool which indicate whether all container set Resource func getResourceSettingForPod(pod *corev1.Pod, bound uint64, resource corev1.ResourceName, fn func(resource corev1.ResourceName, spec corev1.Container) (uint64, bool)) (uint64, bool) { diff --git a/receiver/awscontainerinsightreceiver/internal/stores/podstore_test.go b/receiver/awscontainerinsightreceiver/internal/stores/podstore_test.go index 6a5c98b32455..367ee59f8dbf 100644 --- a/receiver/awscontainerinsightreceiver/internal/stores/podstore_test.go +++ b/receiver/awscontainerinsightreceiver/internal/stores/podstore_test.go @@ -629,9 +629,13 @@ func TestPodStore_addStatus_adds_all_pod_conditions_as_metrics_when_unexpected(t assert.Equal(t, 1, decoratedResultMetric.GetField(PodScheduledMetricName)) assert.Equal(t, 0, decoratedResultMetric.GetField(PodUnknownMetricName)) } - func TestPodStore_addStatus_enhanced_metrics(t *testing.T) { pod := getBaseTestPodInfo() + // add another container + containerCopy := pod.Status.ContainerStatuses[0] + containerCopy.Name = "ubuntu2" + pod.Status.ContainerStatuses = append(pod.Status.ContainerStatuses, containerCopy) + tags := map[string]string{ci.MetricType: ci.TypePod, ci.K8sNamespace: "default", ci.K8sPodNameKey: "cpu-limit"} fields := map[string]interface{}{ci.MetricName(ci.TypePod, ci.CPUTotal): float64(1)} podStore := getPodStore() @@ -644,21 +648,40 @@ func TestPodStore_addStatus_enhanced_metrics(t *testing.T) { val := metric.GetField(ci.MetricName(ci.TypePod, ci.ContainerRestartCount)) assert.Nil(t, val) + // set up container defaults tags = map[string]string{ci.MetricType: ci.TypeContainer, ci.K8sNamespace: "default", ci.K8sPodNameKey: "cpu-limit", ci.ContainerNamekey: "ubuntu"} metric = generateMetric(fields, tags) - podStore.addStatus(metric, pod) assert.Equal(t, "Running", metric.GetTag(ci.ContainerStatus)) val = metric.GetField(ci.ContainerRestartCount) assert.Nil(t, val) - val = metric.GetField(ci.MetricName(ci.TypeContainer, ci.StatusRunning)) + // set up the other container + tags = map[string]string{ci.MetricType: ci.TypeContainer, ci.K8sNamespace: "default", ci.K8sPodNameKey: "cpu-limit", ci.ContainerNamekey: "ubuntu2"} + metric = generateMetric(fields, tags) + podStore.addStatus(metric, pod) + assert.Equal(t, "Running", metric.GetTag(ci.ContainerStatus)) + val = metric.GetField(ci.ContainerRestartCount) + assert.Nil(t, val) + + tags = map[string]string{ci.MetricType: ci.TypePod, ci.K8sNamespace: "default", ci.K8sPodNameKey: "cpu-limit"} + metric = generateMetric(fields, tags) + + podStore.addStatus(metric, pod) + assert.Equal(t, "Running", metric.GetTag(ci.PodStatus)) + val = metric.GetField(ci.ContainerRestartCount) + assert.Nil(t, val) + val = metric.GetField(ci.MetricName(ci.TypePod, ci.StatusContainerRunning)) assert.NotNil(t, val) - assert.Equal(t, 1, val) + assert.Equal(t, 2, val) pod.Status.ContainerStatuses[0].State.Running = nil pod.Status.ContainerStatuses[0].State.Terminated = &corev1.ContainerStateTerminated{} pod.Status.ContainerStatuses[0].LastTerminationState.Terminated = &corev1.ContainerStateTerminated{Reason: "OOMKilled"} pod.Status.ContainerStatuses[0].RestartCount = 1 + pod.Status.ContainerStatuses[1].State.Running = nil + pod.Status.ContainerStatuses[1].State.Terminated = &corev1.ContainerStateTerminated{} + pod.Status.ContainerStatuses[1].LastTerminationState.Terminated = &corev1.ContainerStateTerminated{Reason: "OOMKilled"} + pod.Status.ContainerStatuses[1].RestartCount = 1 pod.Status.Phase = "Succeeded" tags = map[string]string{ci.MetricType: ci.TypePod, ci.K8sNamespace: "default", ci.K8sPodNameKey: "cpu-limit"} @@ -666,69 +689,84 @@ func TestPodStore_addStatus_enhanced_metrics(t *testing.T) { podStore.addStatus(metric, pod) assert.Equal(t, "Succeeded", metric.GetTag(ci.PodStatus)) - assert.Equal(t, int(1), metric.GetField(ci.MetricName(ci.TypePod, ci.ContainerRestartCount)).(int)) + assert.Equal(t, 2, metric.GetField(ci.MetricName(ci.TypePod, ci.ContainerRestartCount))) + // update the container metrics + // set up container defaults tags = map[string]string{ci.MetricType: ci.TypeContainer, ci.K8sNamespace: "default", ci.K8sPodNameKey: "cpu-limit", ci.ContainerNamekey: "ubuntu"} metric = generateMetric(fields, tags) + podStore.addStatus(metric, pod) + assert.Equal(t, 1, metric.GetField(ci.ContainerRestartCount)) + + // test the other container + tags = map[string]string{ci.MetricType: ci.TypeContainer, ci.K8sNamespace: "default", ci.K8sPodNameKey: "cpu-limit", ci.ContainerNamekey: "ubuntu2"} + metric = generateMetric(fields, tags) + podStore.addStatus(metric, pod) + assert.Equal(t, 1, metric.GetField(ci.ContainerRestartCount)) + + tags = map[string]string{ci.MetricType: ci.TypePod, ci.K8sNamespace: "default", ci.K8sPodNameKey: "cpu-limit"} + metric = generateMetric(fields, tags) podStore.addStatus(metric, pod) - assert.Equal(t, "Terminated", metric.GetTag(ci.ContainerStatus)) - assert.Equal(t, "OOMKilled", metric.GetTag(ci.ContainerLastTerminationReason)) - assert.Equal(t, int(1), metric.GetField(ci.ContainerRestartCount).(int)) - assert.Equal(t, 1, metric.GetField(ci.MetricName(ci.TypeContainer, ci.StatusTerminated))) - assert.Equal(t, 1, metric.GetField(ci.MetricName(ci.TypeContainer, ci.StatusTerminatedReasonOOMKilled))) + assert.Equal(t, 2, metric.GetField(ci.MetricName(ci.TypePod, ci.StatusContainerTerminated))) + assert.Equal(t, 2, metric.GetField(ci.MetricName(ci.TypePod, ci.StatusContainerTerminatedReasonOOMKilled))) pod.Status.ContainerStatuses[0].LastTerminationState.Terminated = nil pod.Status.ContainerStatuses[0].State.Waiting = &corev1.ContainerStateWaiting{Reason: "CrashLoopBackOff"} + pod.Status.ContainerStatuses[1].LastTerminationState.Terminated = nil + pod.Status.ContainerStatuses[1].State.Waiting = &corev1.ContainerStateWaiting{Reason: "CrashLoopBackOff"} - tags = map[string]string{ci.MetricType: ci.TypeContainer, ci.K8sNamespace: "default", ci.K8sPodNameKey: "cpu-limit", ci.ContainerNamekey: "ubuntu"} + tags = map[string]string{ci.MetricType: ci.TypePod, ci.K8sNamespace: "default", ci.K8sPodNameKey: "cpu-limit"} metric = generateMetric(fields, tags) podStore.addStatus(metric, pod) - assert.Equal(t, "Waiting", metric.GetTag(ci.ContainerStatus)) - assert.Equal(t, 1, metric.GetField(ci.MetricName(ci.TypeContainer, ci.StatusWaiting))) - assert.Equal(t, 1, metric.GetField(ci.MetricName(ci.TypeContainer, ci.StatusWaitingReasonCrashLoopBackOff))) + //assert.Equal(t, "Waiting", metric.GetTag(ci.ContainerStatus)) + assert.Equal(t, 2, metric.GetField(ci.MetricName(ci.TypePod, ci.StatusContainerWaiting))) + assert.Equal(t, 2, metric.GetField(ci.MetricName(ci.TypePod, ci.StatusContainerWaitingReasonCrashLoopBackOff))) // sparse metrics - assert.Nil(t, metric.GetField(ci.MetricName(ci.TypeContainer, ci.StatusWaitingReasonImagePullError))) - assert.Nil(t, metric.GetField(ci.MetricName(ci.TypeContainer, ci.StatusTerminatedReasonOOMKilled))) - assert.Nil(t, metric.GetField(ci.MetricName(ci.TypeContainer, ci.StatusWaitingReasonStartError))) - assert.Nil(t, metric.GetField(ci.MetricName(ci.TypeContainer, ci.StatusWaitingReasonCreateContainerError))) - assert.Nil(t, metric.GetField(ci.MetricName(ci.TypeContainer, ci.StatusWaitingReasonCreateContainerConfigError))) + assert.Nil(t, metric.GetField(ci.MetricName(ci.TypePod, ci.StatusContainerWaitingReasonImagePullError))) + assert.Nil(t, metric.GetField(ci.MetricName(ci.TypePod, ci.StatusContainerTerminatedReasonOOMKilled))) + assert.Nil(t, metric.GetField(ci.MetricName(ci.TypePod, ci.StatusContainerWaitingReasonStartError))) + assert.Nil(t, metric.GetField(ci.MetricName(ci.TypePod, ci.StatusContainerWaitingReasonCreateContainerError))) + assert.Nil(t, metric.GetField(ci.MetricName(ci.TypePod, ci.StatusContainerWaitingReasonCreateContainerConfigError))) pod.Status.ContainerStatuses[0].State.Waiting = &corev1.ContainerStateWaiting{Reason: "ImagePullBackOff"} + pod.Status.ContainerStatuses[1].State.Waiting = &corev1.ContainerStateWaiting{Reason: "StartError"} - tags = map[string]string{ci.MetricType: ci.TypeContainer, ci.K8sNamespace: "default", ci.K8sPodNameKey: "cpu-limit", ci.ContainerNamekey: "ubuntu"} + tags = map[string]string{ci.MetricType: ci.TypePod, ci.K8sNamespace: "default", ci.K8sPodNameKey: "cpu-limit"} metric = generateMetric(fields, tags) podStore.addStatus(metric, pod) - assert.Equal(t, "Waiting", metric.GetTag(ci.ContainerStatus)) - assert.Equal(t, 1, metric.GetField(ci.MetricName(ci.TypeContainer, ci.StatusWaiting))) - assert.Equal(t, 1, metric.GetField(ci.MetricName(ci.TypeContainer, ci.StatusWaitingReasonImagePullError))) + assert.Equal(t, "Succeeded", metric.GetTag(ci.PodStatus)) + assert.Equal(t, 2, metric.GetField(ci.MetricName(ci.TypePod, ci.StatusContainerWaiting))) + assert.Equal(t, 1, metric.GetField(ci.MetricName(ci.TypePod, ci.StatusContainerWaitingReasonImagePullError))) + assert.Equal(t, 1, metric.GetField(ci.MetricName(ci.TypePod, ci.StatusContainerWaitingReasonStartError))) pod.Status.ContainerStatuses[0].State.Waiting = &corev1.ContainerStateWaiting{Reason: "ErrImagePull"} metric = generateMetric(fields, tags) podStore.addStatus(metric, pod) - assert.Equal(t, 1, metric.GetField(ci.MetricName(ci.TypeContainer, ci.StatusWaitingReasonImagePullError))) + assert.Equal(t, 1, metric.GetField(ci.MetricName(ci.TypePod, ci.StatusContainerWaitingReasonImagePullError))) pod.Status.ContainerStatuses[0].State.Waiting = &corev1.ContainerStateWaiting{Reason: "InvalidImageName"} metric = generateMetric(fields, tags) podStore.addStatus(metric, pod) - assert.Equal(t, 1, metric.GetField(ci.MetricName(ci.TypeContainer, ci.StatusWaitingReasonImagePullError))) + assert.Equal(t, 1, metric.GetField(ci.MetricName(ci.TypePod, ci.StatusContainerWaitingReasonImagePullError))) pod.Status.ContainerStatuses[0].State.Waiting = &corev1.ContainerStateWaiting{Reason: "CreateContainerError"} metric = generateMetric(fields, tags) podStore.addStatus(metric, pod) - assert.Equal(t, 1, metric.GetField(ci.MetricName(ci.TypeContainer, ci.StatusWaitingReasonCreateContainerError))) + assert.Equal(t, 1, metric.GetField(ci.MetricName(ci.TypePod, ci.StatusContainerWaitingReasonCreateContainerError))) pod.Status.ContainerStatuses[0].State.Waiting = &corev1.ContainerStateWaiting{Reason: "CreateContainerConfigError"} metric = generateMetric(fields, tags) podStore.addStatus(metric, pod) - assert.Equal(t, 1, metric.GetField(ci.MetricName(ci.TypeContainer, ci.StatusWaitingReasonCreateContainerConfigError))) + assert.Equal(t, 1, metric.GetField(ci.MetricName(ci.TypePod, ci.StatusContainerWaitingReasonCreateContainerConfigError))) pod.Status.ContainerStatuses[0].State.Waiting = &corev1.ContainerStateWaiting{Reason: "StartError"} + pod.Status.ContainerStatuses[1].State.Waiting = nil metric = generateMetric(fields, tags) podStore.addStatus(metric, pod) - assert.Equal(t, 1, metric.GetField(ci.MetricName(ci.TypeContainer, ci.StatusWaitingReasonStartError))) + assert.Equal(t, 1, metric.GetField(ci.MetricName(ci.TypePod, ci.StatusContainerWaitingReasonStartError))) // test delta of restartCount pod.Status.ContainerStatuses[0].RestartCount = 3 @@ -736,13 +774,13 @@ func TestPodStore_addStatus_enhanced_metrics(t *testing.T) { metric = generateMetric(fields, tags) podStore.addStatus(metric, pod) - assert.Equal(t, int(2), metric.GetField(ci.MetricName(ci.TypePod, ci.ContainerRestartCount)).(int)) + assert.Equal(t, 2, metric.GetField(ci.MetricName(ci.TypePod, ci.ContainerRestartCount))) tags = map[string]string{ci.MetricType: ci.TypeContainer, ci.K8sNamespace: "default", ci.K8sPodNameKey: "cpu-limit", ci.ContainerNamekey: "ubuntu"} metric = generateMetric(fields, tags) podStore.addStatus(metric, pod) - assert.Equal(t, int(2), metric.GetField(ci.ContainerRestartCount).(int)) + assert.Equal(t, 2, metric.GetField(ci.ContainerRestartCount)) } func TestPodStore_addStatus_without_enhanced_metrics(t *testing.T) { @@ -765,7 +803,7 @@ func TestPodStore_addStatus_without_enhanced_metrics(t *testing.T) { assert.Equal(t, "Running", metric.GetTag(ci.ContainerStatus)) val = metric.GetField(ci.ContainerRestartCount) assert.Nil(t, val) - assert.False(t, metric.HasField(ci.MetricName(ci.TypeContainer, ci.StatusRunning))) + assert.False(t, metric.HasField(ci.MetricName(ci.TypeContainer, ci.StatusContainerRunning))) pod.Status.ContainerStatuses[0].State.Running = nil pod.Status.ContainerStatuses[0].State.Terminated = &corev1.ContainerStateTerminated{} @@ -787,7 +825,7 @@ func TestPodStore_addStatus_without_enhanced_metrics(t *testing.T) { assert.Equal(t, "Terminated", metric.GetTag(ci.ContainerStatus)) assert.Equal(t, "OOMKilled", metric.GetTag(ci.ContainerLastTerminationReason)) assert.Equal(t, int(1), metric.GetField(ci.ContainerRestartCount).(int)) - assert.False(t, metric.HasField(ci.MetricName(ci.TypeContainer, ci.StatusTerminated))) + assert.False(t, metric.HasField(ci.MetricName(ci.TypeContainer, ci.StatusContainerTerminated))) pod.Status.ContainerStatuses[0].State.Terminated = nil pod.Status.ContainerStatuses[0].State.Waiting = &corev1.ContainerStateWaiting{Reason: "CrashLoopBackOff"} @@ -797,8 +835,8 @@ func TestPodStore_addStatus_without_enhanced_metrics(t *testing.T) { podStore.addStatus(metric, pod) assert.Equal(t, "Waiting", metric.GetTag(ci.ContainerStatus)) - assert.False(t, metric.HasField(ci.MetricName(ci.TypeContainer, ci.StatusWaiting))) - assert.False(t, metric.HasField(ci.MetricName(ci.TypeContainer, ci.StatusWaitingReasonCrashLoopBackOff))) + assert.False(t, metric.HasField(ci.MetricName(ci.TypeContainer, ci.StatusContainerWaiting))) + assert.False(t, metric.HasField(ci.MetricName(ci.TypeContainer, ci.StatusContainerWaitingReasonCrashLoopBackOff))) pod.Status.ContainerStatuses[0].State.Waiting = &corev1.ContainerStateWaiting{Reason: "SomeOtherReason"} @@ -807,8 +845,8 @@ func TestPodStore_addStatus_without_enhanced_metrics(t *testing.T) { podStore.addStatus(metric, pod) assert.Equal(t, "Waiting", metric.GetTag(ci.ContainerStatus)) - assert.False(t, metric.HasField(ci.MetricName(ci.TypeContainer, ci.StatusWaiting))) - assert.False(t, metric.HasField(ci.MetricName(ci.TypeContainer, ci.StatusWaitingReasonCrashLoopBackOff))) + assert.False(t, metric.HasField(ci.MetricName(ci.TypeContainer, ci.StatusContainerWaiting))) + assert.False(t, metric.HasField(ci.MetricName(ci.TypeContainer, ci.StatusContainerWaitingReasonCrashLoopBackOff))) // test delta of restartCount pod.Status.ContainerStatuses[0].RestartCount = 3