Skip to content

Commit

Permalink
move container_status_ metrics to pod_container_ so we can pick up sh…
Browse files Browse the repository at this point in the history
…ort lived container states
  • Loading branch information
chadpatel committed Oct 9, 2023
1 parent 4b74f35 commit 6abf868
Show file tree
Hide file tree
Showing 3 changed files with 170 additions and 106 deletions.
104 changes: 52 additions & 52 deletions receiver/awscontainerinsightreceiver/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -687,52 +687,58 @@ kubectl apply -f config.yaml
<br/><br/>

### Pod
| Metric | Unit |
|---------------------------------------|--------------|
| pod_cpu_limit | Millicore |
| pod_cpu_request | Millicore |
| pod_cpu_reserved_capacity | Percent |
| pod_cpu_usage_system | Millicore |
| pod_cpu_usage_total | Millicore |
| pod_cpu_usage_user | Millicore |
| pod_cpu_utilization | Percent |
| pod_cpu_utilization_over_pod_limit | Percent |
| pod_memory_cache | Bytes |
| pod_memory_failcnt | Count |
| pod_memory_hierarchical_pgfault | Count/Second |
| pod_memory_hierarchical_pgmajfault | Count/Second |
| pod_memory_limit | Bytes |
| pod_memory_mapped_file | Bytes |
| pod_memory_max_usage | Bytes |
| pod_memory_pgfault | Count/Second |
| pod_memory_pgmajfault | Count/Second |
| pod_memory_request | Bytes |
| pod_memory_reserved_capacity | Percent |
| pod_memory_rss | Bytes |
| pod_memory_swap | Bytes |
| pod_memory_usage | Bytes |
| pod_memory_utilization | Percent |
| pod_memory_utilization_over_pod_limit | Percent |
| pod_memory_working_set | Bytes |
| pod_network_rx_bytes | Bytes/Second |
| pod_network_rx_dropped | Count/Second |
| pod_network_rx_errors | Count/Second |
| pod_network_rx_packets | Count/Second |
| pod_network_total_bytes | Bytes/Second |
| pod_network_tx_bytes | Bytes/Second |
| pod_network_tx_dropped | Count/Second |
| pod_network_tx_errors | Count/Second |
| pod_network_tx_packets | Count/Second |
| pod_number_of_container_restarts | Count |
| pod_number_of_containers | Count |
| pod_number_of_running_containers | Count |
| pod_status_ready | Count |
| pod_status_scheduled | Count |
| pod_status_unknown | Count |
| pod_status_failed | Count |
| pod_status_pending | Count |
| pod_status_running | Count |
| pod_status_succeeded | Count |
| Metric | Unit |
|-------------------------------------------------------------------|--------------|
| pod_cpu_limit | Millicore |
| pod_cpu_request | Millicore |
| pod_cpu_reserved_capacity | Percent |
| pod_cpu_usage_system | Millicore |
| pod_cpu_usage_total | Millicore |
| pod_cpu_usage_user | Millicore |
| pod_cpu_utilization | Percent |
| pod_cpu_utilization_over_pod_limit | Percent |
| pod_memory_cache | Bytes |
| pod_memory_failcnt | Count |
| pod_memory_hierarchical_pgfault | Count/Second |
| pod_memory_hierarchical_pgmajfault | Count/Second |
| pod_memory_limit | Bytes |
| pod_memory_mapped_file | Bytes |
| pod_memory_max_usage | Bytes |
| pod_memory_pgfault | Count/Second |
| pod_memory_pgmajfault | Count/Second |
| pod_memory_request | Bytes |
| pod_memory_reserved_capacity | Percent |
| pod_memory_rss | Bytes |
| pod_memory_swap | Bytes |
| pod_memory_usage | Bytes |
| pod_memory_utilization | Percent |
| pod_memory_utilization_over_pod_limit | Percent |
| pod_memory_working_set | Bytes |
| pod_network_rx_bytes | Bytes/Second |
| pod_network_rx_dropped | Count/Second |
| pod_network_rx_errors | Count/Second |
| pod_network_rx_packets | Count/Second |
| pod_network_total_bytes | Bytes/Second |
| pod_network_tx_bytes | Bytes/Second |
| pod_network_tx_dropped | Count/Second |
| pod_network_tx_errors | Count/Second |
| pod_network_tx_packets | Count/Second |
| pod_number_of_container_restarts | Count |
| pod_number_of_containers | Count |
| pod_number_of_running_containers | Count |
| pod_status_ready | Count |
| pod_status_scheduled | Count |
| pod_status_unknown | Count |
| pod_status_failed | Count |
| pod_status_pending | Count |
| pod_status_running | Count |
| pod_status_succeeded | Count |
| pod_container_status_waiting_reason_crash_loop_back_off | Count |
| pod_container_status_waiting_reason_image_pull_error | Count |
| pod_container_status_waiting_reason_start_error | Count |
| pod_container_status_waiting_reason_create_container_error | Count |
| pod_container_status_waiting_reason_create_container_config_error | Count |
| pod_container_status_terminated_reason_oom_killed | Count |

| Resource Attribute |
|----------------------|
Expand Down Expand Up @@ -819,12 +825,6 @@ kubectl apply -f config.yaml
| container_status_running | Count |
| container_status_terminated | Count |
| container_status_waiting | Count |
| container_status_waiting_reason_crash_loop_back_off | Count |
| container_status_waiting_reason_image_pull_error | Count |
| container_status_waiting_reason_start_error | Count |
| container_status_waiting_reason_create_container_error | Count |
| container_status_waiting_reason_create_container_config_error | Count |
| container_status_terminated_reason_oom_killed | Count |

<br/><br/>

Expand Down
74 changes: 50 additions & 24 deletions receiver/awscontainerinsightreceiver/internal/stores/podstore.go
Original file line number Diff line number Diff line change
Expand Up @@ -497,12 +497,14 @@ func (p *PodStore) addStatus(metric CIMetric, pod *corev1.Pod) {
if p.includeEnhancedMetrics {
p.addPodStatusMetrics(metric, pod)
p.addPodConditionMetrics(metric, pod)
p.addPodContainerStatusMetrics(metric, pod)
}

var curContainerRestarts int
for _, containerStatus := range pod.Status.ContainerStatuses {
curContainerRestarts += int(containerStatus.RestartCount)
}

podKey := createPodKeyFromMetric(metric)
if podKey != "" {
content, ok := p.getPrevMeasurement(ci.TypePod, podKey)
Expand All @@ -520,38 +522,23 @@ func (p *PodStore) addStatus(metric CIMetric, pod *corev1.Pod) {
if containerName := metric.GetTag(ci.ContainerNamekey); containerName != "" {
for _, containerStatus := range pod.Status.ContainerStatuses {
if containerStatus.Name == containerName {
possibleStatuses := map[string]int{
ci.StatusRunning: 0,
ci.StatusWaiting: 0,
ci.StatusTerminated: 0,
}
switch {
case containerStatus.State.Running != nil:
metric.AddTag(ci.ContainerStatus, "Running")
possibleStatuses[ci.StatusRunning] = 1
case containerStatus.State.Waiting != nil:
metric.AddTag(ci.ContainerStatus, "Waiting")
possibleStatuses[ci.StatusWaiting] = 1
reason := containerStatus.State.Waiting.Reason
if reason != "" {
metric.AddTag(ci.ContainerStatusReason, reason)
if val, ok := ci.WaitingReasonLookup[reason]; ok {
possibleStatuses[val] = 1
}
if containerStatus.State.Waiting.Reason != "" {
metric.AddTag(ci.ContainerStatusReason, containerStatus.State.Waiting.Reason)
}
case containerStatus.State.Terminated != nil:
metric.AddTag(ci.ContainerStatus, "Terminated")
possibleStatuses[ci.StatusTerminated] = 1
if containerStatus.State.Terminated.Reason != "" {
metric.AddTag(ci.ContainerStatusReason, containerStatus.State.Terminated.Reason)
}
}

if containerStatus.LastTerminationState.Terminated != nil && containerStatus.LastTerminationState.Terminated.Reason != "" {
metric.AddTag(ci.ContainerLastTerminationReason, containerStatus.LastTerminationState.Terminated.Reason)
if strings.Contains(containerStatus.LastTerminationState.Terminated.Reason, "OOMKilled") {
possibleStatuses[ci.StatusTerminatedReasonOOMKilled] = 1
}
}
containerKey := createContainerKeyFromMetric(metric)
if containerKey != "" {
Expand All @@ -566,13 +553,6 @@ func (p *PodStore) addStatus(metric CIMetric, pod *corev1.Pod) {
}
p.setPrevMeasurement(ci.TypeContainer, containerKey, prevContainerMeasurement{restarts: int(containerStatus.RestartCount)})
}

// add container containerStatus metrics
if p.includeEnhancedMetrics {
for name, val := range possibleStatuses {
metric.AddField(ci.MetricName(ci.TypeContainer, name), val)
}
}
}
}
}
Expand Down Expand Up @@ -613,6 +593,52 @@ func (p *PodStore) addPodConditionMetrics(metric CIMetric, pod *corev1.Pod) {
}
}

func (p *PodStore) addPodContainerStatusMetrics(metric CIMetric, pod *corev1.Pod) {
possibleStatuses := map[string]int{
ci.StatusRunning: 0,
ci.StatusWaiting: 0,
ci.StatusTerminated: 0,
}
for _, containerStatus := range pod.Status.ContainerStatuses {
switch {
case containerStatus.State.Running != nil:
possibleStatuses[ci.StatusRunning]++
case containerStatus.State.Waiting != nil:
possibleStatuses[ci.StatusWaiting]++
reason := containerStatus.State.Waiting.Reason
if reason != "" {
if val, ok := ci.WaitingReasonLookup[reason]; ok {
if _, foundStatus := possibleStatuses[val]; foundStatus {
possibleStatuses[val]++
} else {
possibleStatuses[val] = 1
}
}
}
case containerStatus.State.Terminated != nil:
possibleStatuses[ci.StatusTerminated]++
if containerStatus.State.Terminated.Reason != "" {
metric.AddTag(ci.ContainerStatusReason, containerStatus.State.Terminated.Reason)
}
}

if containerStatus.LastTerminationState.Terminated != nil && containerStatus.LastTerminationState.Terminated.Reason != "" {
if strings.Contains(containerStatus.LastTerminationState.Terminated.Reason, "OOMKilled") {
if _, foundStatus := possibleStatuses[ci.StatusTerminatedReasonOOMKilled]; foundStatus {
possibleStatuses[ci.StatusTerminatedReasonOOMKilled]++
} else {
possibleStatuses[ci.StatusTerminatedReasonOOMKilled] = 1
}
}
}
}

for name, val := range possibleStatuses {
// desired prefix: pod_container_
metric.AddField(ci.MetricName(ci.TypePod, "container_"+name), val)
}
}

// It could be used to get limit/request(depend on the passed-in fn) per pod
// return the sum of ResourceSetting and a bool which indicate whether all container set Resource
func getResourceSettingForPod(pod *corev1.Pod, bound uint64, resource corev1.ResourceName, fn func(resource corev1.ResourceName, spec corev1.Container) (uint64, bool)) (uint64, bool) {
Expand Down
Loading

0 comments on commit 6abf868

Please sign in to comment.