diff --git a/Makefile b/Makefile index 87c19a4f6b..198f4fe5ab 100644 --- a/Makefile +++ b/Makefile @@ -95,6 +95,12 @@ config-downloader: copy-version-file $(WIN_BUILD)/config-downloader.exe github.com/aws/amazon-cloudwatch-agent/cmd/config-downloader $(DARWIN_BUILD)/config-downloader github.com/aws/amazon-cloudwatch-agent/cmd/config-downloader +# A fast build that only builds amd64, we don't need wizard and config downloader +build-for-docker: + $(LINUX_AMD64_BUILD)/amazon-cloudwatch-agent github.com/aws/amazon-cloudwatch-agent/cmd/amazon-cloudwatch-agent + $(LINUX_AMD64_BUILD)/start-amazon-cloudwatch-agent github.com/aws/amazon-cloudwatch-agent/cmd/start-amazon-cloudwatch-agent + $(LINUX_AMD64_BUILD)/config-translator github.com/aws/amazon-cloudwatch-agent/cmd/config-translator + fmt: go fmt ./... diff --git a/amazon-cloudwatch-container-insights/k8s-deployment-manifest-templates/deployment-mode/daemonset/combination/combination.yaml b/amazon-cloudwatch-container-insights/k8s-deployment-manifest-templates/deployment-mode/daemonset/combination/combination.yaml index 0d598cd3fd..d6bd40bc88 100644 --- a/amazon-cloudwatch-container-insights/k8s-deployment-manifest-templates/deployment-mode/daemonset/combination/combination.yaml +++ b/amazon-cloudwatch-container-insights/k8s-deployment-manifest-templates/deployment-mode/daemonset/combination/combination.yaml @@ -158,6 +158,9 @@ spec: - name: varlibdocker mountPath: /var/lib/docker readOnly: true + - name: containerdsock + mountPath: /run/containerd/containerd.sock + readOnly: true - name: sys mountPath: /sys readOnly: true @@ -177,6 +180,9 @@ spec: - name: varlibdocker hostPath: path: /var/lib/docker + - name: containerdsock + hostPath: + path: /run/containerd/containerd.sock - name: sys hostPath: path: /sys diff --git a/amazon-cloudwatch-container-insights/k8s-deployment-manifest-templates/deployment-mode/daemonset/container-insights-monitoring/cwagent/cwagent-daemonset.yaml b/amazon-cloudwatch-container-insights/k8s-deployment-manifest-templates/deployment-mode/daemonset/container-insights-monitoring/cwagent/cwagent-daemonset.yaml index 43b89300da..0cef4e19cc 100644 --- a/amazon-cloudwatch-container-insights/k8s-deployment-manifest-templates/deployment-mode/daemonset/container-insights-monitoring/cwagent/cwagent-daemonset.yaml +++ b/amazon-cloudwatch-container-insights/k8s-deployment-manifest-templates/deployment-mode/daemonset/container-insights-monitoring/cwagent/cwagent-daemonset.yaml @@ -56,6 +56,9 @@ spec: - name: varlibdocker mountPath: /var/lib/docker readOnly: true + - name: containerdsock + mountPath: /run/containerd/containerd.sock + readOnly: true - name: sys mountPath: /sys readOnly: true @@ -75,6 +78,9 @@ spec: - name: varlibdocker hostPath: path: /var/lib/docker + - name: containerdsock + hostPath: + path: /run/containerd/containerd.sock - name: sys hostPath: path: /sys diff --git a/amazon-cloudwatch-container-insights/k8s-deployment-manifest-templates/deployment-mode/daemonset/container-insights-monitoring/quickstart/cwagent-fluent-bit-quickstart.yaml b/amazon-cloudwatch-container-insights/k8s-deployment-manifest-templates/deployment-mode/daemonset/container-insights-monitoring/quickstart/cwagent-fluent-bit-quickstart.yaml index 4c5059ceab..c1fe2b46d9 100644 --- a/amazon-cloudwatch-container-insights/k8s-deployment-manifest-templates/deployment-mode/daemonset/container-insights-monitoring/quickstart/cwagent-fluent-bit-quickstart.yaml +++ b/amazon-cloudwatch-container-insights/k8s-deployment-manifest-templates/deployment-mode/daemonset/container-insights-monitoring/quickstart/cwagent-fluent-bit-quickstart.yaml @@ -140,6 +140,9 @@ spec: - name: varlibdocker mountPath: /var/lib/docker readOnly: true + - name: containerdsock + mountPath: /run/containerd/containerd.sock + readOnly: true - name: sys mountPath: /sys readOnly: true @@ -159,6 +162,9 @@ spec: - name: varlibdocker hostPath: path: /var/lib/docker + - name: containerdsock + hostPath: + path: /run/containerd/containerd.sock - name: sys hostPath: path: /sys diff --git a/amazon-cloudwatch-container-insights/k8s-deployment-manifest-templates/deployment-mode/daemonset/container-insights-monitoring/quickstart/cwagent-fluentd-quickstart.yaml b/amazon-cloudwatch-container-insights/k8s-deployment-manifest-templates/deployment-mode/daemonset/container-insights-monitoring/quickstart/cwagent-fluentd-quickstart.yaml index 9702625ee4..8d9d23cb11 100644 --- a/amazon-cloudwatch-container-insights/k8s-deployment-manifest-templates/deployment-mode/daemonset/container-insights-monitoring/quickstart/cwagent-fluentd-quickstart.yaml +++ b/amazon-cloudwatch-container-insights/k8s-deployment-manifest-templates/deployment-mode/daemonset/container-insights-monitoring/quickstart/cwagent-fluentd-quickstart.yaml @@ -140,6 +140,9 @@ spec: - name: varlibdocker mountPath: /var/lib/docker readOnly: true + - name: containerdsock + mountPath: /run/containerd/containerd.sock + readOnly: true - name: sys mountPath: /sys readOnly: true @@ -159,6 +162,9 @@ spec: - name: varlibdocker hostPath: path: /var/lib/docker + - name: containerdsock + hostPath: + path: /run/containerd/containerd.sock - name: sys hostPath: path: /sys diff --git a/internal/containerinsightscommon/const.go b/internal/containerinsightscommon/const.go index db8084e33a..0b9dc01672 100644 --- a/internal/containerinsightscommon/const.go +++ b/internal/containerinsightscommon/const.go @@ -92,4 +92,7 @@ const ( TypeContainer = "Container" TypeContainerFS = "ContainerFS" TypeContainerDiskIO = "ContainerDiskIO" + // Special type for pause container, introduced in https://github.com/aws/amazon-cloudwatch-agent/issues/188 + // because containerd does not set container name pause container name to POD like docker does. + TypeInfraContainer = "InfraContainer" ) diff --git a/plugins/inputs/cadvisor/container_info_processor.go b/plugins/inputs/cadvisor/container_info_processor.go index 2e8d0741dc..4b30a58909 100644 --- a/plugins/inputs/cadvisor/container_info_processor.go +++ b/plugins/inputs/cadvisor/container_info_processor.go @@ -67,9 +67,11 @@ func processContainers(cInfos []*cinfo.ContainerInfo, detailMode bool, container metrics = append(metrics, processPod(cInfo, podKeys)...) } } - // This happens when our cgroup path based pod detection logic is not working. + // This happens when our cgroup path and label based pod detection logic is not working. + // contained https://github.com/aws/amazon-cloudwatch-agent/issues/188 + // docker systemd https://github.com/aws/amazon-cloudwatch-agent/pull/171 if len(metrics) == beforePod { - log.Printf("W! No pod metric collected, metrics count is still %d", beforePod) + log.Printf("W! No pod metric collected, metrics count is still %d is containerd socket mounted? https://github.com/aws/amazon-cloudwatch-agent/issues/188", beforePod) } metrics = mergeMetrics(metrics) @@ -98,17 +100,20 @@ func processContainer(info *cinfo.ContainerInfo, detailMode bool, containerOrche if !detailMode { return result, pKey } + // Only a container has all these three labels set. containerName := info.Spec.Labels[containerNameLable] namespace := info.Spec.Labels[namespaceLable] podName := info.Spec.Labels[podNameLable] podId := info.Spec.Labels[podIdLable] - if containerName == "" || namespace == "" || podName == "" { + // NOTE: containerName can be empty for pause container on containerd + // https://github.com/containerd/cri/issues/922#issuecomment-423729537 + if namespace == "" || podName == "" { return result, pKey } // Pod's cgroup path is parent for a container. - // contianer name: /kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod04d39715_075e_4c7c_b128_67f7897c05b7.slice/docker-57b3dabd69b94beb462244a0c15c244b509adad0940cdcc67ca079b8208ec1f2.scope + // container name: /kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod04d39715_075e_4c7c_b128_67f7897c05b7.slice/docker-57b3dabd69b94beb462244a0c15c244b509adad0940cdcc67ca079b8208ec1f2.scope // pod name: /kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod04d39715_075e_4c7c_b128_67f7897c05b7.slice/ podPath := path.Dir(info.Name) pKey = &podKey{cgroupPath: podPath, podName: podName, podId: podId, namespace: namespace} @@ -116,14 +121,23 @@ func processContainer(info *cinfo.ContainerInfo, detailMode bool, containerOrche tags[PodIdKey] = podId tags[K8sPodNameKey] = podName tags[K8sNamespace] = namespace - if containerName != infraContainerName { + + switch containerName { + // For docker, pause container name is set to POD while containerd does not set it. + // See https://github.com/aws/amazon-cloudwatch-agent/issues/188 + case "", infraContainerName: + // NOTE: the pod here is only used by NetMetricExtractor, + // other pod info like CPU, Mem are dealt within in processPod. + containerType = TypeInfraContainer + default: tags[ContainerNamekey] = containerName tags[ContainerIdkey] = path.Base(info.Name) containerType = TypeContainer - } else { - // NOTE: the pod here is only used by NetMetricExtractor, - // other pod info like CPU, Mem are dealt within in processPod. - containerType = TypePod + + // TODO(pingleig): wait for upstream fix https://github.com/aws/amazon-cloudwatch-agent/issues/192 + if !info.Spec.HasFilesystem { + log.Printf("D! containerd does not have container filesystem metrics from cadvisor, See https://github.com/aws/amazon-cloudwatch-agent/issues/192") + } } } else { containerType = TypeNode @@ -146,6 +160,10 @@ func processContainer(info *cinfo.ContainerInfo, detailMode bool, containerOrche return result, pKey } +// processPod is almost identical as processContainer. We got this second loop because pod detection relies +// on inspecting labels from containers in processContainer. cgroup path for detected pods are saved in podKeys. +// We may not get container before pod when looping all returned cgroup paths so we use a two pass solution +// in processContainers. func processPod(info *cinfo.ContainerInfo, podKeys map[string]podKey) []*extractors.CAdvisorMetric { var result []*extractors.CAdvisorMetric if isContainerInContainer(info.Name) { @@ -153,8 +171,8 @@ func processPod(info *cinfo.ContainerInfo, podKeys map[string]podKey) []*extract return result } - podKey := getPodKey(info, podKeys) - if podKey == nil { + podKey, ok := podKeys[info.Name] + if !ok { return result } @@ -177,16 +195,6 @@ func processPod(info *cinfo.ContainerInfo, podKeys map[string]podKey) []*extract return result } -func getPodKey(info *cinfo.ContainerInfo, podKeys map[string]podKey) *podKey { - key := info.Name - - if v, ok := podKeys[key]; ok { - return &v - } - - return nil -} - // Check if it's a container running inside container, caller will drop the metric when return value is true. // The validation is based on ContainerReference.Name, which is essentially cgroup path. // The first version is from https://github.com/aws/amazon-cloudwatch-agent/commit/e8daa5f5926c5a5f38e0ceb746c141be463e11e4#diff-599185154c116b295172b56311729990d20672f6659500870997c018ce072100 diff --git a/plugins/inputs/cadvisor/extractors/cpu_extractor.go b/plugins/inputs/cadvisor/extractors/cpu_extractor.go index 7c3d7cc685..f5c51b211f 100644 --- a/plugins/inputs/cadvisor/extractors/cpu_extractor.go +++ b/plugins/inputs/cadvisor/extractors/cpu_extractor.go @@ -29,7 +29,8 @@ func (c *CpuMetricExtractor) recordPreviousInfo(info *cInfo.ContainerInfo) { func (c *CpuMetricExtractor) GetValue(info *cInfo.ContainerInfo, containerType string) []*CAdvisorMetric { var metrics []*CAdvisorMetric - if info.Spec.Labels[containerNameLable] == infraContainerName { + // Skip infra container and handle node, pod, other containers in pod + if containerType == TypeInfraContainer { return metrics } @@ -41,6 +42,7 @@ func (c *CpuMetricExtractor) GetValue(info *cInfo.ContainerInfo, containerType s if deltaCTimeInNano > MinTimeDiff { metric := newCadvisorMetric(containerType) + metric.cgroupPath = info.Name metric.fields[MetricName(containerType, CpuTotal)] = float64(curStats.Cpu.Usage.Total-preStats.Cpu.Usage.Total) / float64(deltaCTimeInNano) * decimalToMillicores metric.fields[MetricName(containerType, CpuUser)] = float64(curStats.Cpu.Usage.User-preStats.Cpu.Usage.User) / float64(deltaCTimeInNano) * decimalToMillicores @@ -59,6 +61,6 @@ func (c *CpuMetricExtractor) CleanUp(now time.Time) { func NewCpuMetricExtractor() *CpuMetricExtractor { return &CpuMetricExtractor{ - preInfos: mapWithExpiry.NewMapWithExpiry(CleanInteval), + preInfos: mapWithExpiry.NewMapWithExpiry(CleanInterval), } } diff --git a/plugins/inputs/cadvisor/extractors/diskio_extractor.go b/plugins/inputs/cadvisor/extractors/diskio_extractor.go index 8a9529a9f6..887413a8d4 100644 --- a/plugins/inputs/cadvisor/extractors/diskio_extractor.go +++ b/plugins/inputs/cadvisor/extractors/diskio_extractor.go @@ -90,7 +90,7 @@ func (d *DiskIOMetricExtractor) CleanUp(now time.Time) { func NewDiskIOMetricExtractor() *DiskIOMetricExtractor { return &DiskIOMetricExtractor{ - preInfos: mapWithExpiry.NewMapWithExpiry(CleanInteval), + preInfos: mapWithExpiry.NewMapWithExpiry(CleanInterval), } } diff --git a/plugins/inputs/cadvisor/extractors/extractor.go b/plugins/inputs/cadvisor/extractors/extractor.go index 5e6b20f094..d076f18e11 100644 --- a/plugins/inputs/cadvisor/extractors/extractor.go +++ b/plugins/inputs/cadvisor/extractors/extractor.go @@ -12,21 +12,28 @@ import ( ) const ( - containerNameLable = "io.kubernetes.container.name" - // TODO: https://github.com/containerd/cri/issues/922#issuecomment-423729537 the container name can be empty on containerd - infraContainerName = "POD" + containerNameLabel = "io.kubernetes.container.name" Metrics = "Metrics" Dimensions = "Dimensions" - CleanInteval = 5 * time.Minute + CleanInterval = 5 * time.Minute ) type MetricExtractor interface { HasValue(*cinfo.ContainerInfo) bool - GetValue(*cinfo.ContainerInfo, string) []*CAdvisorMetric + // GetValue normally applies to the following types: + // containerinsightscommon.TypeContainer + // containerinsightscommon.TypePod + // containerinsightscommon.TypeNode + // and ignores: + // containerinsightscommon.TypeInfraContainer + // The only exception is NetMetricExtractor because pod network metrics comes from infra container (i.e. pause). + // See https://www.ianlewis.org/en/almighty-pause-container + GetValue(info *cinfo.ContainerInfo, containerType string) []*CAdvisorMetric CleanUp(time.Time) } type CAdvisorMetric struct { + cgroupPath string // source of the metric for debugging merge conflict fields map[string]interface{} tags map[string]string metricType string @@ -68,7 +75,8 @@ func (c *CAdvisorMetric) Merge(src *CAdvisorMetric) { // If there is any conflict, keep the fields with earlier timestamp for k, v := range src.fields { if _, ok := c.fields[k]; ok { - log.Printf("D! metric being merged has conflict in fields, src: %v, dest: %v \n", *src, *c) + log.Printf("D! metric being merged has conflict in fields, path src: %q, dest: %q", src.cgroupPath, c.cgroupPath) + log.Printf("D! metric being merged has conflict in fields, src: %v, dest: %v", *src, *c) if c.tags[containerinsightscommon.Timestamp] < src.tags[containerinsightscommon.Timestamp] { continue } diff --git a/plugins/inputs/cadvisor/extractors/fs_extractor.go b/plugins/inputs/cadvisor/extractors/fs_extractor.go index ab46604137..e8f70f8cb5 100644 --- a/plugins/inputs/cadvisor/extractors/fs_extractor.go +++ b/plugins/inputs/cadvisor/extractors/fs_extractor.go @@ -20,13 +20,14 @@ type FileSystemMetricExtractor struct { allowListRegexP *regexp.Regexp } +// TODO(pingleig): it is always false for container using containerd https://github.com/aws/amazon-cloudwatch-agent/issues/192 func (f *FileSystemMetricExtractor) HasValue(info *cinfo.ContainerInfo) bool { return info.Spec.HasFilesystem } func (f *FileSystemMetricExtractor) GetValue(info *cinfo.ContainerInfo, containerType string) []*CAdvisorMetric { var metrics []*CAdvisorMetric - if containerType == TypePod || info.Spec.Labels[containerNameLable] == infraContainerName { + if containerType == TypePod || containerType == TypeInfraContainer { return metrics } @@ -58,6 +59,7 @@ func (f *FileSystemMetricExtractor) GetValue(info *cinfo.ContainerInfo, containe metric.fields[MetricName(containerType, FSInodesfree)] = v.InodesFree } + metric.cgroupPath = info.Name metrics = append(metrics, metric) } return metrics diff --git a/plugins/inputs/cadvisor/extractors/mem_extractor.go b/plugins/inputs/cadvisor/extractors/mem_extractor.go index f8efdaf022..7e8be186fa 100644 --- a/plugins/inputs/cadvisor/extractors/mem_extractor.go +++ b/plugins/inputs/cadvisor/extractors/mem_extractor.go @@ -25,11 +25,12 @@ func (m *MemMetricExtractor) HasValue(info *cinfo.ContainerInfo) bool { func (m *MemMetricExtractor) GetValue(info *cinfo.ContainerInfo, containerType string) []*CAdvisorMetric { var metrics []*CAdvisorMetric - if info.Spec.Labels[containerNameLable] == infraContainerName { + if containerType == TypeInfraContainer { return metrics } metric := newCadvisorMetric(containerType) + metric.cgroupPath = info.Name curStats := GetStats(info) metric.fields[MetricName(containerType, MemUsage)] = curStats.Memory.Usage @@ -64,6 +65,6 @@ func (m *MemMetricExtractor) CleanUp(now time.Time) { func NewMemMetricExtractor() *MemMetricExtractor { return &MemMetricExtractor{ - preInfos: mapWithExpiry.NewMapWithExpiry(CleanInteval), + preInfos: mapWithExpiry.NewMapWithExpiry(CleanInterval), } } diff --git a/plugins/inputs/cadvisor/extractors/net_extractor.go b/plugins/inputs/cadvisor/extractors/net_extractor.go index 5691f311d5..4cae233111 100644 --- a/plugins/inputs/cadvisor/extractors/net_extractor.go +++ b/plugins/inputs/cadvisor/extractors/net_extractor.go @@ -39,10 +39,14 @@ func (n *NetMetricExtractor) HasValue(info *cinfo.ContainerInfo) bool { func (n *NetMetricExtractor) GetValue(info *cinfo.ContainerInfo, containerType string) []*CAdvisorMetric { var metrics []*CAdvisorMetric - // Just a protection here, there is no Container level Net metrics - if (containerType == TypePod && info.Spec.Labels[containerNameLable] != infraContainerName) || containerType == TypeContainer { + // Ignore both pod and container because the network metrics comes from InfraContainer. + if containerType == TypePod || containerType == TypeContainer { return metrics } + // Rename type to pod so the metric name prefix is pod_ + if containerType == TypeInfraContainer { + containerType = TypePod + } if preInfo, ok := n.preInfos.Get(info.Name); ok { curStats := GetStats(info) @@ -86,6 +90,7 @@ func (n *NetMetricExtractor) GetValue(info *cinfo.ContainerInfo, containerType s metric.fields[MetricName(mType, k)] = v } + metric.cgroupPath = info.Name metrics = append(metrics, metric) break } @@ -97,6 +102,7 @@ func (n *NetMetricExtractor) GetValue(info *cinfo.ContainerInfo, containerType s for k, v := range aggregatedFields { metric.fields[MetricName(containerType, k)] = v } + metric.cgroupPath = info.Name metrics = append(metrics, metric) } } @@ -112,7 +118,7 @@ func (n *NetMetricExtractor) CleanUp(now time.Time) { func NewNetMetricExtractor() *NetMetricExtractor { return &NetMetricExtractor{ - preInfos: mapWithExpiry.NewMapWithExpiry(CleanInteval), + preInfos: mapWithExpiry.NewMapWithExpiry(CleanInterval), } } diff --git a/plugins/inputs/cadvisor/merger.go b/plugins/inputs/cadvisor/merger.go index 8d0a1e8145..f2bf9b751e 100644 --- a/plugins/inputs/cadvisor/merger.go +++ b/plugins/inputs/cadvisor/merger.go @@ -5,6 +5,7 @@ package cadvisor import ( "fmt" + . "github.com/aws/amazon-cloudwatch-agent/internal/containerinsightscommon" "github.com/aws/amazon-cloudwatch-agent/plugins/inputs/cadvisor/extractors" )